In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
import itertools
In [2]:
application_data = pd.read_csv(r'application_data.csv')
previous_application = pd.read_csv(r'previous_application.csv')
columns_description = pd.read_csv(r'columns_description.csv',skiprows=1)
In [3]:
print ("application_data     :",application_data.shape)
print ("previous_application :",previous_application.shape)
print ("columns_description  :",columns_description.shape)
application_data     : (307511, 122)
previous_application : (1670214, 37)
columns_description  : (159, 5)
In [4]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
display("application_data")
display(application_data.head(3))
'application_data'
SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE EXT_SOURCE_1 EXT_SOURCE_2 EXT_SOURCE_3 APARTMENTS_AVG BASEMENTAREA_AVG YEARS_BEGINEXPLUATATION_AVG YEARS_BUILD_AVG COMMONAREA_AVG ELEVATORS_AVG ENTRANCES_AVG FLOORSMAX_AVG FLOORSMIN_AVG LANDAREA_AVG LIVINGAPARTMENTS_AVG LIVINGAREA_AVG NONLIVINGAPARTMENTS_AVG NONLIVINGAREA_AVG APARTMENTS_MODE BASEMENTAREA_MODE YEARS_BEGINEXPLUATATION_MODE YEARS_BUILD_MODE COMMONAREA_MODE ELEVATORS_MODE ENTRANCES_MODE FLOORSMAX_MODE FLOORSMIN_MODE LANDAREA_MODE LIVINGAPARTMENTS_MODE LIVINGAREA_MODE NONLIVINGAPARTMENTS_MODE NONLIVINGAREA_MODE APARTMENTS_MEDI BASEMENTAREA_MEDI YEARS_BEGINEXPLUATATION_MEDI YEARS_BUILD_MEDI COMMONAREA_MEDI ELEVATORS_MEDI ENTRANCES_MEDI FLOORSMAX_MEDI FLOORSMIN_MEDI LANDAREA_MEDI LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR
0 100002 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 -9461 -637 -3648.0 -2120 NaN 1 1 0 1 1 0 Laborers 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 0.083037 0.262949 0.139376 0.0247 0.0369 0.9722 0.6192 0.0143 0.00 0.0690 0.0833 0.1250 0.0369 0.0202 0.0190 0.0000 0.0000 0.0252 0.0383 0.9722 0.6341 0.0144 0.0000 0.0690 0.0833 0.1250 0.0377 0.022 0.0198 0.0 0.0 0.0250 0.0369 0.9722 0.6243 0.0144 0.00 0.0690 0.0833 0.1250 0.0375 0.0205 0.0193 0.0000 0.00 reg oper account block of flats 0.0149 Stone, brick No 2.0 2.0 2.0 2.0 -1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0
1 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 -16765 -1188 -1186.0 -291 NaN 1 1 0 1 1 0 Core staff 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 0.311267 0.622246 NaN 0.0959 0.0529 0.9851 0.7960 0.0605 0.08 0.0345 0.2917 0.3333 0.0130 0.0773 0.0549 0.0039 0.0098 0.0924 0.0538 0.9851 0.8040 0.0497 0.0806 0.0345 0.2917 0.3333 0.0128 0.079 0.0554 0.0 0.0 0.0968 0.0529 0.9851 0.7987 0.0608 0.08 0.0345 0.2917 0.3333 0.0132 0.0787 0.0558 0.0039 0.01 reg oper account block of flats 0.0714 Block No 1.0 0.0 1.0 0.0 -828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
2 100004 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 -19046 -225 -4260.0 -2531 26.0 1 1 1 1 1 0 Laborers 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government NaN 0.555912 0.729567 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
In [5]:
display("previous_application ")
display(previous_application.head(3))
'previous_application '
SK_ID_PREV SK_ID_CURR NAME_CONTRACT_TYPE AMT_ANNUITY AMT_APPLICATION AMT_CREDIT AMT_DOWN_PAYMENT AMT_GOODS_PRICE WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY RATE_DOWN_PAYMENT RATE_INTEREST_PRIMARY RATE_INTEREST_PRIVILEGED NAME_CASH_LOAN_PURPOSE NAME_CONTRACT_STATUS DAYS_DECISION NAME_PAYMENT_TYPE CODE_REJECT_REASON NAME_TYPE_SUITE NAME_CLIENT_TYPE NAME_GOODS_CATEGORY NAME_PORTFOLIO NAME_PRODUCT_TYPE CHANNEL_TYPE SELLERPLACE_AREA NAME_SELLER_INDUSTRY CNT_PAYMENT NAME_YIELD_GROUP PRODUCT_COMBINATION DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION NFLAG_INSURED_ON_APPROVAL
0 2030495 271877 Consumer loans 1730.430 17145.0 17145.0 0.0 17145.0 SATURDAY 15 Y 1 0.0 0.182832 0.867336 XAP Approved -73 Cash through the bank XAP NaN Repeater Mobile POS XNA Country-wide 35 Connectivity 12.0 middle POS mobile with interest 365243.0 -42.0 300.0 -42.0 -37.0 0.0
1 2802425 108129 Cash loans 25188.615 607500.0 679671.0 NaN 607500.0 THURSDAY 11 Y 1 NaN NaN NaN XNA Approved -164 XNA XAP Unaccompanied Repeater XNA Cash x-sell Contact center -1 XNA 36.0 low_action Cash X-Sell: low 365243.0 -134.0 916.0 365243.0 365243.0 1.0
2 2523466 122040 Cash loans 15060.735 112500.0 136444.5 NaN 112500.0 TUESDAY 11 Y 1 NaN NaN NaN XNA Approved -301 Cash through the bank XAP Spouse, partner Repeater XNA Cash x-sell Credit and cash offices -1 XNA 12.0 high Cash X-Sell: high 365243.0 -271.0 59.0 365243.0 365243.0 1.0
In [6]:
display("columns_description")
pd.set_option('display.max_colwidth', -1)
columns_description=columns_description.drop(['1'],axis=1)
display(columns_description)
'columns_description'
application_data SK_ID_CURR ID of loan in our sample Unnamed: 4
0 application_data TARGET Target variable (1 - client with payment difficulties: he/she had late payment more than X days on at least one of the first Y installments of the loan in our sample, 0 - all other cases) NaN
1 application_data NAME_CONTRACT_TYPE Identification if loan is cash or revolving NaN
2 application_data CODE_GENDER Gender of the client NaN
3 application_data FLAG_OWN_CAR Flag if the client owns a car NaN
4 application_data FLAG_OWN_REALTY Flag if client owns a house or flat NaN
5 application_data CNT_CHILDREN Number of children the client has NaN
6 application_data AMT_INCOME_TOTAL Income of the client NaN
7 application_data AMT_CREDIT Credit amount of the loan NaN
8 application_data AMT_ANNUITY Loan annuity NaN
9 application_data AMT_GOODS_PRICE For consumer loans it is the price of the goods for which the loan is given NaN
10 application_data NAME_TYPE_SUITE Who was accompanying client when he was applying for the loan NaN
11 application_data NAME_INCOME_TYPE Clients income type (businessman, working, maternity leave,…) NaN
12 application_data NAME_EDUCATION_TYPE Level of highest education the client achieved NaN
13 application_data NAME_FAMILY_STATUS Family status of the client NaN
14 application_data NAME_HOUSING_TYPE What is the housing situation of the client (renting, living with parents, ...) NaN
15 application_data REGION_POPULATION_RELATIVE Normalized population of region where client lives (higher number means the client lives in more populated region) normalized
16 application_data DAYS_BIRTH Client's age in days at the time of application time only relative to the application
17 application_data DAYS_EMPLOYED How many days before the application the person started current employment time only relative to the application
18 application_data DAYS_REGISTRATION How many days before the application did client change his registration time only relative to the application
19 application_data DAYS_ID_PUBLISH How many days before the application did client change the identity document with which he applied for the loan time only relative to the application
20 application_data OWN_CAR_AGE Age of client's car NaN
21 application_data FLAG_MOBIL Did client provide mobile phone (1=YES, 0=NO) NaN
22 application_data FLAG_EMP_PHONE Did client provide work phone (1=YES, 0=NO) NaN
23 application_data FLAG_WORK_PHONE Did client provide home phone (1=YES, 0=NO) NaN
24 application_data FLAG_CONT_MOBILE Was mobile phone reachable (1=YES, 0=NO) NaN
25 application_data FLAG_PHONE Did client provide home phone (1=YES, 0=NO) NaN
26 application_data FLAG_EMAIL Did client provide email (1=YES, 0=NO) NaN
27 application_data OCCUPATION_TYPE What kind of occupation does the client have NaN
28 application_data CNT_FAM_MEMBERS How many family members does client have NaN
29 application_data REGION_RATING_CLIENT Our rating of the region where client lives (1,2,3) NaN
30 application_data REGION_RATING_CLIENT_W_CITY Our rating of the region where client lives with taking city into account (1,2,3) NaN
31 application_data WEEKDAY_APPR_PROCESS_START On which day of the week did the client apply for the loan NaN
32 application_data HOUR_APPR_PROCESS_START Approximately at what hour did the client apply for the loan rounded
33 application_data REG_REGION_NOT_LIVE_REGION Flag if client's permanent address does not match contact address (1=different, 0=same, at region level) NaN
34 application_data REG_REGION_NOT_WORK_REGION Flag if client's permanent address does not match work address (1=different, 0=same, at region level) NaN
35 application_data LIVE_REGION_NOT_WORK_REGION Flag if client's contact address does not match work address (1=different, 0=same, at region level) NaN
36 application_data REG_CITY_NOT_LIVE_CITY Flag if client's permanent address does not match contact address (1=different, 0=same, at city level) NaN
37 application_data REG_CITY_NOT_WORK_CITY Flag if client's permanent address does not match work address (1=different, 0=same, at city level) NaN
38 application_data LIVE_CITY_NOT_WORK_CITY Flag if client's contact address does not match work address (1=different, 0=same, at city level) NaN
39 application_data ORGANIZATION_TYPE Type of organization where client works NaN
40 application_data EXT_SOURCE_1 Normalized score from external data source normalized
41 application_data EXT_SOURCE_2 Normalized score from external data source normalized
42 application_data EXT_SOURCE_3 Normalized score from external data source normalized
43 application_data APARTMENTS_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
44 application_data BASEMENTAREA_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
45 application_data YEARS_BEGINEXPLUATATION_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
46 application_data YEARS_BUILD_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
47 application_data COMMONAREA_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
48 application_data ELEVATORS_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
49 application_data ENTRANCES_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
50 application_data FLOORSMAX_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
51 application_data FLOORSMIN_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
52 application_data LANDAREA_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
53 application_data LIVINGAPARTMENTS_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
54 application_data LIVINGAREA_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
55 application_data NONLIVINGAPARTMENTS_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
56 application_data NONLIVINGAREA_AVG Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
57 application_data APARTMENTS_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
58 application_data BASEMENTAREA_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
59 application_data YEARS_BEGINEXPLUATATION_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
60 application_data YEARS_BUILD_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
61 application_data COMMONAREA_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
62 application_data ELEVATORS_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
63 application_data ENTRANCES_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
64 application_data FLOORSMAX_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
65 application_data FLOORSMIN_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
66 application_data LANDAREA_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
67 application_data LIVINGAPARTMENTS_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
68 application_data LIVINGAREA_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
69 application_data NONLIVINGAPARTMENTS_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
70 application_data NONLIVINGAREA_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
71 application_data APARTMENTS_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
72 application_data BASEMENTAREA_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
73 application_data YEARS_BEGINEXPLUATATION_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
74 application_data YEARS_BUILD_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
75 application_data COMMONAREA_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
76 application_data ELEVATORS_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
77 application_data ENTRANCES_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
78 application_data FLOORSMAX_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
79 application_data FLOORSMIN_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
80 application_data LANDAREA_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
81 application_data LIVINGAPARTMENTS_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
82 application_data LIVINGAREA_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
83 application_data NONLIVINGAPARTMENTS_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
84 application_data NONLIVINGAREA_MEDI Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
85 application_data FONDKAPREMONT_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
86 application_data HOUSETYPE_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
87 application_data TOTALAREA_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
88 application_data WALLSMATERIAL_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
89 application_data EMERGENCYSTATE_MODE Normalized information about building where the client lives, What is average (_AVG suffix), modus (_MODE suffix), median (_MEDI suffix) apartment size, common area, living area, age of building, number of elevators, number of entrances, state of the building, number of floor normalized
90 application_data OBS_30_CNT_SOCIAL_CIRCLE How many observation of client's social surroundings with observable 30 DPD (days past due) default NaN
91 application_data DEF_30_CNT_SOCIAL_CIRCLE How many observation of client's social surroundings defaulted on 30 DPD (days past due) NaN
92 application_data OBS_60_CNT_SOCIAL_CIRCLE How many observation of client's social surroundings with observable 60 DPD (days past due) default NaN
93 application_data DEF_60_CNT_SOCIAL_CIRCLE How many observation of client's social surroundings defaulted on 60 (days past due) DPD NaN
94 application_data DAYS_LAST_PHONE_CHANGE How many days before application did client change phone NaN
95 application_data FLAG_DOCUMENT_2 Did client provide document 2 NaN
96 application_data FLAG_DOCUMENT_3 Did client provide document 3 NaN
97 application_data FLAG_DOCUMENT_4 Did client provide document 4 NaN
98 application_data FLAG_DOCUMENT_5 Did client provide document 5 NaN
99 application_data FLAG_DOCUMENT_6 Did client provide document 6 NaN
100 application_data FLAG_DOCUMENT_7 Did client provide document 7 NaN
101 application_data FLAG_DOCUMENT_8 Did client provide document 8 NaN
102 application_data FLAG_DOCUMENT_9 Did client provide document 9 NaN
103 application_data FLAG_DOCUMENT_10 Did client provide document 10 NaN
104 application_data FLAG_DOCUMENT_11 Did client provide document 11 NaN
105 application_data FLAG_DOCUMENT_12 Did client provide document 12 NaN
106 application_data FLAG_DOCUMENT_13 Did client provide document 13 NaN
107 application_data FLAG_DOCUMENT_14 Did client provide document 14 NaN
108 application_data FLAG_DOCUMENT_15 Did client provide document 15 NaN
109 application_data FLAG_DOCUMENT_16 Did client provide document 16 NaN
110 application_data FLAG_DOCUMENT_17 Did client provide document 17 NaN
111 application_data FLAG_DOCUMENT_18 Did client provide document 18 NaN
112 application_data FLAG_DOCUMENT_19 Did client provide document 19 NaN
113 application_data FLAG_DOCUMENT_20 Did client provide document 20 NaN
114 application_data FLAG_DOCUMENT_21 Did client provide document 21 NaN
115 application_data AMT_REQ_CREDIT_BUREAU_HOUR Number of enquiries to Credit Bureau about the client one hour before application NaN
116 application_data AMT_REQ_CREDIT_BUREAU_DAY Number of enquiries to Credit Bureau about the client one day before application (excluding one hour before application) NaN
117 application_data AMT_REQ_CREDIT_BUREAU_WEEK Number of enquiries to Credit Bureau about the client one week before application (excluding one day before application) NaN
118 application_data AMT_REQ_CREDIT_BUREAU_MON Number of enquiries to Credit Bureau about the client one month before application (excluding one week before application) NaN
119 application_data AMT_REQ_CREDIT_BUREAU_QRT Number of enquiries to Credit Bureau about the client 3 month before application (excluding one month before application) NaN
120 application_data AMT_REQ_CREDIT_BUREAU_YEAR Number of enquiries to Credit Bureau about the client one day year (excluding last 3 months before application) NaN
121 previous_application.csv SK_ID_PREV ID of previous credit in Home credit related to loan in our sample. (One loan in our sample can have 0,1,2 or more previous loan applications in Home Credit, previous application could, but not necessarily have to lead to credit) hashed
122 previous_application.csv SK_ID_CURR ID of loan in our sample hashed
123 previous_application.csv NAME_CONTRACT_TYPE Contract product type (Cash loan, consumer loan [POS] ,...) of the previous application NaN
124 previous_application.csv AMT_ANNUITY Annuity of previous application NaN
125 previous_application.csv AMT_APPLICATION For how much credit did client ask on the previous application NaN
126 previous_application.csv AMT_CREDIT Final credit amount on the previous application. This differs from AMT_APPLICATION in a way that the AMT_APPLICATION is the amount for which the client initially applied for, but during our approval process he could have received different amount - AMT_CREDIT NaN
127 previous_application.csv AMT_DOWN_PAYMENT Down payment on the previous application NaN
128 previous_application.csv AMT_GOODS_PRICE Goods price of good that client asked for (if applicable) on the previous application NaN
129 previous_application.csv WEEKDAY_APPR_PROCESS_START On which day of the week did the client apply for previous application NaN
130 previous_application.csv HOUR_APPR_PROCESS_START Approximately at what day hour did the client apply for the previous application rounded
131 previous_application.csv FLAG_LAST_APPL_PER_CONTRACT Flag if it was last application for the previous contract. Sometimes by mistake of client or our clerk there could be more applications for one single contract NaN
132 previous_application.csv NFLAG_LAST_APPL_IN_DAY Flag if the application was the last application per day of the client. Sometimes clients apply for more applications a day. Rarely it could also be error in our system that one application is in the database twice NaN
133 previous_application.csv NFLAG_MICRO_CASH Flag Micro finance loan NaN
134 previous_application.csv RATE_DOWN_PAYMENT Down payment rate normalized on previous credit normalized
135 previous_application.csv RATE_INTEREST_PRIMARY Interest rate normalized on previous credit normalized
136 previous_application.csv RATE_INTEREST_PRIVILEGED Interest rate normalized on previous credit normalized
137 previous_application.csv NAME_CASH_LOAN_PURPOSE Purpose of the cash loan NaN
138 previous_application.csv NAME_CONTRACT_STATUS Contract status (approved, cancelled, ...) of previous application NaN
139 previous_application.csv DAYS_DECISION Relative to current application when was the decision about previous application made time only relative to the application
140 previous_application.csv NAME_PAYMENT_TYPE Payment method that client chose to pay for the previous application NaN
141 previous_application.csv CODE_REJECT_REASON Why was the previous application rejected NaN
142 previous_application.csv NAME_TYPE_SUITE Who accompanied client when applying for the previous application NaN
143 previous_application.csv NAME_CLIENT_TYPE Was the client old or new client when applying for the previous application NaN
144 previous_application.csv NAME_GOODS_CATEGORY What kind of goods did the client apply for in the previous application NaN
145 previous_application.csv NAME_PORTFOLIO Was the previous application for CASH, POS, CAR, … NaN
146 previous_application.csv NAME_PRODUCT_TYPE Was the previous application x-sell o walk-in NaN
147 previous_application.csv CHANNEL_TYPE Through which channel we acquired the client on the previous application NaN
148 previous_application.csv SELLERPLACE_AREA Selling area of seller place of the previous application NaN
149 previous_application.csv NAME_SELLER_INDUSTRY The industry of the seller NaN
150 previous_application.csv CNT_PAYMENT Term of previous credit at application of the previous application NaN
151 previous_application.csv NAME_YIELD_GROUP Grouped interest rate into small medium and high of the previous application grouped
152 previous_application.csv PRODUCT_COMBINATION Detailed product combination of the previous application NaN
153 previous_application.csv DAYS_FIRST_DRAWING Relative to application date of current application when was the first disbursement of the previous application time only relative to the application
154 previous_application.csv DAYS_FIRST_DUE Relative to application date of current application when was the first due supposed to be of the previous application time only relative to the application
155 previous_application.csv DAYS_LAST_DUE_1ST_VERSION Relative to application date of current application when was the first due of the previous application time only relative to the application
156 previous_application.csv DAYS_LAST_DUE Relative to application date of current application when was the last due date of the previous application time only relative to the application
157 previous_application.csv DAYS_TERMINATION Relative to application date of current application when was the expected termination of the previous application time only relative to the application
158 previous_application.csv NFLAG_INSURED_ON_APPROVAL Did the client requested insurance during the previous application NaN
In [7]:
fig = plt.figure(figsize=(18,6))
miss_previous_application = pd.DataFrame((previous_application.isnull().sum())*100/previous_application.shape[0]).reset_index()
miss_previous_application["type"] = "previous_application"
ax = sns.pointplot("index",0,data=miss_previous_application,hue="type")
plt.xticks(rotation =90,fontsize =7)
plt.title("Percentage of Missing values in previous_application")
plt.ylabel("PERCENTAGE")
plt.xlabel("COLUMNS")
ax.set_facecolor("k")
fig.set_facecolor("lightgrey")
In [8]:
round(100*(previous_application.isnull().sum()/len(previous_application.index)),2)
Out[8]:
SK_ID_PREV                     0.00 
SK_ID_CURR                     0.00 
NAME_CONTRACT_TYPE             0.00 
AMT_ANNUITY                    22.29
AMT_APPLICATION                0.00 
AMT_CREDIT                     0.00 
AMT_DOWN_PAYMENT               53.64
AMT_GOODS_PRICE                23.08
WEEKDAY_APPR_PROCESS_START     0.00 
HOUR_APPR_PROCESS_START        0.00 
FLAG_LAST_APPL_PER_CONTRACT    0.00 
NFLAG_LAST_APPL_IN_DAY         0.00 
RATE_DOWN_PAYMENT              53.64
RATE_INTEREST_PRIMARY          99.64
RATE_INTEREST_PRIVILEGED       99.64
NAME_CASH_LOAN_PURPOSE         0.00 
NAME_CONTRACT_STATUS           0.00 
DAYS_DECISION                  0.00 
NAME_PAYMENT_TYPE              0.00 
CODE_REJECT_REASON             0.00 
NAME_TYPE_SUITE                49.12
NAME_CLIENT_TYPE               0.00 
NAME_GOODS_CATEGORY            0.00 
NAME_PORTFOLIO                 0.00 
NAME_PRODUCT_TYPE              0.00 
CHANNEL_TYPE                   0.00 
SELLERPLACE_AREA               0.00 
NAME_SELLER_INDUSTRY           0.00 
CNT_PAYMENT                    22.29
NAME_YIELD_GROUP               0.00 
PRODUCT_COMBINATION            0.02 
DAYS_FIRST_DRAWING             40.30
DAYS_FIRST_DUE                 40.30
DAYS_LAST_DUE_1ST_VERSION      40.30
DAYS_LAST_DUE                  40.30
DAYS_TERMINATION               40.30
NFLAG_INSURED_ON_APPROVAL      40.30
dtype: float64
In [9]:
previous_application=previous_application.drop([ 'AMT_DOWN_PAYMENT', 'RATE_DOWN_PAYMENT', 'RATE_INTEREST_PRIMARY',
       "RATE_INTEREST_PRIVILEGED"],axis=1)
In [10]:
fig = plt.figure(figsize=(18,6))
miss_previous_application = pd.DataFrame((previous_application.isnull().sum())*100/previous_application.shape[0]).reset_index()
miss_previous_application["type"] = "previous_application"
ax = sns.pointplot("index",0,data=miss_previous_application,hue="type")
plt.xticks(rotation =90,fontsize =7)
plt.title("Percentage of Missing values in previous_application")
plt.ylabel("PERCENTAGE")
plt.xlabel("COLUMNS")
ax.set_facecolor("k")
fig.set_facecolor("lightgrey")
In [11]:
round(100*(previous_application.isnull().sum()/len(previous_application.index)),2)
Out[11]:
SK_ID_PREV                     0.00 
SK_ID_CURR                     0.00 
NAME_CONTRACT_TYPE             0.00 
AMT_ANNUITY                    22.29
AMT_APPLICATION                0.00 
AMT_CREDIT                     0.00 
AMT_GOODS_PRICE                23.08
WEEKDAY_APPR_PROCESS_START     0.00 
HOUR_APPR_PROCESS_START        0.00 
FLAG_LAST_APPL_PER_CONTRACT    0.00 
NFLAG_LAST_APPL_IN_DAY         0.00 
NAME_CASH_LOAN_PURPOSE         0.00 
NAME_CONTRACT_STATUS           0.00 
DAYS_DECISION                  0.00 
NAME_PAYMENT_TYPE              0.00 
CODE_REJECT_REASON             0.00 
NAME_TYPE_SUITE                49.12
NAME_CLIENT_TYPE               0.00 
NAME_GOODS_CATEGORY            0.00 
NAME_PORTFOLIO                 0.00 
NAME_PRODUCT_TYPE              0.00 
CHANNEL_TYPE                   0.00 
SELLERPLACE_AREA               0.00 
NAME_SELLER_INDUSTRY           0.00 
CNT_PAYMENT                    22.29
NAME_YIELD_GROUP               0.00 
PRODUCT_COMBINATION            0.02 
DAYS_FIRST_DRAWING             40.30
DAYS_FIRST_DUE                 40.30
DAYS_LAST_DUE_1ST_VERSION      40.30
DAYS_LAST_DUE                  40.30
DAYS_TERMINATION               40.30
NFLAG_INSURED_ON_APPROVAL      40.30
dtype: float64
In [12]:
print("AMT_ANNUITY NULL COUNT:" ,previous_application['AMT_ANNUITY'].isnull().sum())
AMT_ANNUITY NULL COUNT: 372235
In [13]:
previous_application['AMT_ANNUITY'].describe()
Out[13]:
count    1.297979e+06
mean     1.595512e+04
std      1.478214e+04
min      0.000000e+00
25%      6.321780e+03
50%      1.125000e+04
75%      2.065842e+04
max      4.180581e+05
Name: AMT_ANNUITY, dtype: float64
In [14]:
sns.set_style('whitegrid') 
sns.distplot(previous_application['AMT_ANNUITY']) 
plt.show()
In [15]:
print("AMT_GOODS_PRICE NULL COUNT:" ,previous_application['AMT_GOODS_PRICE'].isnull().sum())
AMT_GOODS_PRICE NULL COUNT: 385515
In [16]:
previous_application['AMT_GOODS_PRICE'].describe()
Out[16]:
count    1.284699e+06
mean     2.278473e+05
std      3.153966e+05
min      0.000000e+00
25%      5.084100e+04
50%      1.123200e+05
75%      2.340000e+05
max      6.905160e+06
Name: AMT_GOODS_PRICE, dtype: float64
In [17]:
sns.set_style('whitegrid') 
sns.distplot(previous_application['AMT_GOODS_PRICE']) 
plt.show()
In [18]:
print("NAME_TYPE_SUITE NULL COUNT:" ,previous_application['NAME_TYPE_SUITE'].isnull().sum())
NAME_TYPE_SUITE NULL COUNT: 820405
In [19]:
previous_application['NAME_TYPE_SUITE'].value_counts()
Out[19]:
Unaccompanied      508970
Family             213263
Spouse, partner    67069 
Children           31566 
Other_B            17624 
Other_A            9077  
Group of people    2240  
Name: NAME_TYPE_SUITE, dtype: int64
In [20]:
print("CNT_PAYMENT NULL COUNT:" ,previous_application['CNT_PAYMENT'].isnull().sum())
CNT_PAYMENT NULL COUNT: 372230
In [21]:
previous_application['CNT_PAYMENT'].describe()
Out[21]:
count    1.297984e+06
mean     1.605408e+01
std      1.456729e+01
min      0.000000e+00
25%      6.000000e+00
50%      1.200000e+01
75%      2.400000e+01
max      8.400000e+01
Name: CNT_PAYMENT, dtype: float64
In [22]:
sns.set_style('whitegrid') 
sns.boxplot(previous_application['CNT_PAYMENT']) 
plt.show()
In [23]:
print("DAYS_FIRST_DRAWING :" ,previous_application['CNT_PAYMENT'].isnull().sum())
DAYS_FIRST_DRAWING : 372230
In [24]:
previous_application['DAYS_FIRST_DRAWING'].describe()
Out[24]:
count    997149.000000
mean     342209.855039
std      88916.115834 
min     -2922.000000  
25%      365243.000000
50%      365243.000000
75%      365243.000000
max      365243.000000
Name: DAYS_FIRST_DRAWING, dtype: float64
In [25]:
sns.set_style('whitegrid') 
sns.boxplot(previous_application['DAYS_FIRST_DRAWING']) 
plt.show()
In [26]:
print("DAYS_FIRST_DUE :" ,previous_application['DAYS_FIRST_DUE'].isnull().sum())
DAYS_FIRST_DUE : 673065
In [27]:
previous_application['DAYS_FIRST_DUE'].describe()
Out[27]:
count    997149.000000
mean     13826.269337 
std      72444.869708 
min     -2892.000000  
25%     -1628.000000  
50%     -831.000000   
75%     -411.000000   
max      365243.000000
Name: DAYS_FIRST_DUE, dtype: float64
In [28]:
sns.set_style('whitegrid') 
sns.boxplot(previous_application['DAYS_FIRST_DUE']) 
plt.show()
In [29]:
print("DAYS_LAST_DUE_1ST_VERSION :" ,previous_application['DAYS_LAST_DUE_1ST_VERSION'].isnull().sum())
DAYS_LAST_DUE_1ST_VERSION : 673065
In [30]:
previous_application['DAYS_LAST_DUE_1ST_VERSION'].describe()
Out[30]:
count    997149.000000
mean     33767.774054 
std      106857.034789
min     -2801.000000  
25%     -1242.000000  
50%     -361.000000   
75%      129.000000   
max      365243.000000
Name: DAYS_LAST_DUE_1ST_VERSION, dtype: float64
In [31]:
sns.set_style('whitegrid') 
sns.boxplot(previous_application['DAYS_LAST_DUE_1ST_VERSION']) 
plt.show()
In [32]:
print("DAYS_LAST_DUE:" ,previous_application['DAYS_LAST_DUE'].isnull().sum())
DAYS_LAST_DUE: 673065
In [33]:
previous_application['DAYS_LAST_DUE'].describe()
Out[33]:
count    997149.000000
mean     76582.403064 
std      149647.415123
min     -2889.000000  
25%     -1314.000000  
50%     -537.000000   
75%     -74.000000    
max      365243.000000
Name: DAYS_LAST_DUE, dtype: float64
In [34]:
sns.set_style('whitegrid') 
sns.boxplot(previous_application['DAYS_LAST_DUE']) 
plt.show()
In [35]:
print("DAYS_TERMINATION :" ,previous_application['DAYS_TERMINATION'].isnull().sum())
DAYS_TERMINATION : 673065
In [36]:
previous_application['DAYS_TERMINATION'].describe()
Out[36]:
count    997149.000000
mean     81992.343838 
std      153303.516729
min     -2874.000000  
25%     -1270.000000  
50%     -499.000000   
75%     -44.000000    
max      365243.000000
Name: DAYS_TERMINATION, dtype: float64
In [37]:
sns.set_style('whitegrid') 
sns.boxplot(previous_application['DAYS_TERMINATION']) 
plt.show()
In [38]:
print("NFLAG_INSURED_ON_APPROVAL:" ,previous_application['NFLAG_INSURED_ON_APPROVAL'].isnull().sum())
NFLAG_INSURED_ON_APPROVAL: 673065
In [39]:
previous_application['NFLAG_INSURED_ON_APPROVAL'].value_counts()
Out[39]:
0.0    665527
1.0    331622
Name: NFLAG_INSURED_ON_APPROVAL, dtype: int64
In [40]:
previous_application.isnull().sum()
Out[40]:
SK_ID_PREV                     0     
SK_ID_CURR                     0     
NAME_CONTRACT_TYPE             0     
AMT_ANNUITY                    372235
AMT_APPLICATION                0     
AMT_CREDIT                     1     
AMT_GOODS_PRICE                385515
WEEKDAY_APPR_PROCESS_START     0     
HOUR_APPR_PROCESS_START        0     
FLAG_LAST_APPL_PER_CONTRACT    0     
NFLAG_LAST_APPL_IN_DAY         0     
NAME_CASH_LOAN_PURPOSE         0     
NAME_CONTRACT_STATUS           0     
DAYS_DECISION                  0     
NAME_PAYMENT_TYPE              0     
CODE_REJECT_REASON             0     
NAME_TYPE_SUITE                820405
NAME_CLIENT_TYPE               0     
NAME_GOODS_CATEGORY            0     
NAME_PORTFOLIO                 0     
NAME_PRODUCT_TYPE              0     
CHANNEL_TYPE                   0     
SELLERPLACE_AREA               0     
NAME_SELLER_INDUSTRY           0     
CNT_PAYMENT                    372230
NAME_YIELD_GROUP               0     
PRODUCT_COMBINATION            346   
DAYS_FIRST_DRAWING             673065
DAYS_FIRST_DUE                 673065
DAYS_LAST_DUE_1ST_VERSION      673065
DAYS_LAST_DUE                  673065
DAYS_TERMINATION               673065
NFLAG_INSURED_ON_APPROVAL      673065
dtype: int64
In [41]:
print("AMT_CREDIT :" ,previous_application['AMT_CREDIT'].isnull().sum())
AMT_CREDIT : 1
In [42]:
previous_application['AMT_CREDIT'].describe()
Out[42]:
count    1.670213e+06
mean     1.961140e+05
std      3.185746e+05
min      0.000000e+00
25%      2.416050e+04
50%      8.054100e+04
75%      2.164185e+05
max      6.905160e+06
Name: AMT_CREDIT, dtype: float64
In [43]:
sns.set_style('whitegrid') 
sns.boxplot(previous_application['AMT_CREDIT']) 
plt.show()
In [44]:
print("PRODUCT_COMBINATION :" ,previous_application['PRODUCT_COMBINATION'].isnull().sum())
PRODUCT_COMBINATION : 346
In [45]:
previous_application['PRODUCT_COMBINATION'].value_counts()
Out[45]:
Cash                              285990
POS household with interest       263622
POS mobile with interest          220670
Cash X-Sell: middle               143883
Cash X-Sell: low                  130248
Card Street                       112582
POS industry with interest        98833 
POS household without interest    82908 
Card X-Sell                       80582 
Cash Street: high                 59639 
Cash X-Sell: high                 59301 
Cash Street: middle               34658 
Cash Street: low                  33834 
POS mobile without interest       24082 
POS other with interest           23879 
POS industry without interest     12602 
POS others without interest       2555  
Name: PRODUCT_COMBINATION, dtype: int64
In [46]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'
In [47]:
obj_dtypes = [i for i in previous_application.select_dtypes(include=object).columns if i not in ["type"] ]
num_dtypes = [i for i in previous_application.select_dtypes(include = np.number).columns if i not in ['SK_ID_CURR'] + [ 'TARGET']]
In [48]:
print(color.BOLD + color.PURPLE + 'Categorical Columns' + color.END, "\n")
for x in range(len(obj_dtypes)): 
    print(obj_dtypes[x])
Categorical Columns 

NAME_CONTRACT_TYPE
WEEKDAY_APPR_PROCESS_START
FLAG_LAST_APPL_PER_CONTRACT
NAME_CASH_LOAN_PURPOSE
NAME_CONTRACT_STATUS
NAME_PAYMENT_TYPE
CODE_REJECT_REASON
NAME_TYPE_SUITE
NAME_CLIENT_TYPE
NAME_GOODS_CATEGORY
NAME_PORTFOLIO
NAME_PRODUCT_TYPE
CHANNEL_TYPE
NAME_SELLER_INDUSTRY
NAME_YIELD_GROUP
PRODUCT_COMBINATION
In [49]:
print(color.BOLD + color.PURPLE + 'Numerical' + color.END, "\n")
for x in range(len(obj_dtypes)): 
    print(obj_dtypes[x])
Numerical 

NAME_CONTRACT_TYPE
WEEKDAY_APPR_PROCESS_START
FLAG_LAST_APPL_PER_CONTRACT
NAME_CASH_LOAN_PURPOSE
NAME_CONTRACT_STATUS
NAME_PAYMENT_TYPE
CODE_REJECT_REASON
NAME_TYPE_SUITE
NAME_CLIENT_TYPE
NAME_GOODS_CATEGORY
NAME_PORTFOLIO
NAME_PRODUCT_TYPE
CHANNEL_TYPE
NAME_SELLER_INDUSTRY
NAME_YIELD_GROUP
PRODUCT_COMBINATION
In [50]:
fig = plt.figure(figsize=(18,6))
miss_application_data = pd.DataFrame((application_data.isnull().sum())*100/application_data.shape[0]).reset_index()
miss_application_data["type"] = "application_data"
ax = sns.pointplot("index",0,data=miss_application_data,hue="type")
plt.xticks(rotation =90,fontsize =7)
plt.title("Percentage of Missing values in application_data")
plt.ylabel("PERCENTAGE")
plt.xlabel("COLUMNS")
ax.set_facecolor("k")
fig.set_facecolor("lightgrey")
In [52]:
round(100*(application_data.isnull().sum()/len(application_data.index)),2)
Out[52]:
SK_ID_CURR                      0.00 
TARGET                          0.00 
NAME_CONTRACT_TYPE              0.00 
CODE_GENDER                     0.00 
FLAG_OWN_CAR                    0.00 
FLAG_OWN_REALTY                 0.00 
CNT_CHILDREN                    0.00 
AMT_INCOME_TOTAL                0.00 
AMT_CREDIT                      0.00 
AMT_ANNUITY                     0.00 
AMT_GOODS_PRICE                 0.09 
NAME_TYPE_SUITE                 0.42 
NAME_INCOME_TYPE                0.00 
NAME_EDUCATION_TYPE             0.00 
NAME_FAMILY_STATUS              0.00 
NAME_HOUSING_TYPE               0.00 
REGION_POPULATION_RELATIVE      0.00 
DAYS_BIRTH                      0.00 
DAYS_EMPLOYED                   0.00 
DAYS_REGISTRATION               0.00 
DAYS_ID_PUBLISH                 0.00 
OWN_CAR_AGE                     65.99
FLAG_MOBIL                      0.00 
FLAG_EMP_PHONE                  0.00 
FLAG_WORK_PHONE                 0.00 
FLAG_CONT_MOBILE                0.00 
FLAG_PHONE                      0.00 
FLAG_EMAIL                      0.00 
OCCUPATION_TYPE                 31.35
CNT_FAM_MEMBERS                 0.00 
REGION_RATING_CLIENT            0.00 
REGION_RATING_CLIENT_W_CITY     0.00 
WEEKDAY_APPR_PROCESS_START      0.00 
HOUR_APPR_PROCESS_START         0.00 
REG_REGION_NOT_LIVE_REGION      0.00 
REG_REGION_NOT_WORK_REGION      0.00 
LIVE_REGION_NOT_WORK_REGION     0.00 
REG_CITY_NOT_LIVE_CITY          0.00 
REG_CITY_NOT_WORK_CITY          0.00 
LIVE_CITY_NOT_WORK_CITY         0.00 
ORGANIZATION_TYPE               0.00 
EXT_SOURCE_1                    56.38
EXT_SOURCE_2                    0.21 
EXT_SOURCE_3                    19.83
APARTMENTS_AVG                  50.75
BASEMENTAREA_AVG                58.52
YEARS_BEGINEXPLUATATION_AVG     48.78
YEARS_BUILD_AVG                 66.50
COMMONAREA_AVG                  69.87
ELEVATORS_AVG                   53.30
ENTRANCES_AVG                   50.35
FLOORSMAX_AVG                   49.76
FLOORSMIN_AVG                   67.85
LANDAREA_AVG                    59.38
LIVINGAPARTMENTS_AVG            68.35
LIVINGAREA_AVG                  50.19
NONLIVINGAPARTMENTS_AVG         69.43
NONLIVINGAREA_AVG               55.18
APARTMENTS_MODE                 50.75
BASEMENTAREA_MODE               58.52
YEARS_BEGINEXPLUATATION_MODE    48.78
YEARS_BUILD_MODE                66.50
COMMONAREA_MODE                 69.87
ELEVATORS_MODE                  53.30
ENTRANCES_MODE                  50.35
FLOORSMAX_MODE                  49.76
FLOORSMIN_MODE                  67.85
LANDAREA_MODE                   59.38
LIVINGAPARTMENTS_MODE           68.35
LIVINGAREA_MODE                 50.19
NONLIVINGAPARTMENTS_MODE        69.43
NONLIVINGAREA_MODE              55.18
APARTMENTS_MEDI                 50.75
BASEMENTAREA_MEDI               58.52
YEARS_BEGINEXPLUATATION_MEDI    48.78
YEARS_BUILD_MEDI                66.50
COMMONAREA_MEDI                 69.87
ELEVATORS_MEDI                  53.30
ENTRANCES_MEDI                  50.35
FLOORSMAX_MEDI                  49.76
FLOORSMIN_MEDI                  67.85
LANDAREA_MEDI                   59.38
LIVINGAPARTMENTS_MEDI           68.35
LIVINGAREA_MEDI                 50.19
NONLIVINGAPARTMENTS_MEDI        69.43
NONLIVINGAREA_MEDI              55.18
FONDKAPREMONT_MODE              68.39
HOUSETYPE_MODE                  50.18
TOTALAREA_MODE                  48.27
WALLSMATERIAL_MODE              50.84
EMERGENCYSTATE_MODE             47.40
OBS_30_CNT_SOCIAL_CIRCLE        0.33 
DEF_30_CNT_SOCIAL_CIRCLE        0.33 
OBS_60_CNT_SOCIAL_CIRCLE        0.33 
DEF_60_CNT_SOCIAL_CIRCLE        0.33 
DAYS_LAST_PHONE_CHANGE          0.00 
FLAG_DOCUMENT_2                 0.00 
FLAG_DOCUMENT_3                 0.00 
FLAG_DOCUMENT_4                 0.00 
FLAG_DOCUMENT_5                 0.00 
FLAG_DOCUMENT_6                 0.00 
FLAG_DOCUMENT_7                 0.00 
FLAG_DOCUMENT_8                 0.00 
FLAG_DOCUMENT_9                 0.00 
FLAG_DOCUMENT_10                0.00 
FLAG_DOCUMENT_11                0.00 
FLAG_DOCUMENT_12                0.00 
FLAG_DOCUMENT_13                0.00 
FLAG_DOCUMENT_14                0.00 
FLAG_DOCUMENT_15                0.00 
FLAG_DOCUMENT_16                0.00 
FLAG_DOCUMENT_17                0.00 
FLAG_DOCUMENT_18                0.00 
FLAG_DOCUMENT_19                0.00 
FLAG_DOCUMENT_20                0.00 
FLAG_DOCUMENT_21                0.00 
AMT_REQ_CREDIT_BUREAU_HOUR      13.50
AMT_REQ_CREDIT_BUREAU_DAY       13.50
AMT_REQ_CREDIT_BUREAU_WEEK      13.50
AMT_REQ_CREDIT_BUREAU_MON       13.50
AMT_REQ_CREDIT_BUREAU_QRT       13.50
AMT_REQ_CREDIT_BUREAU_YEAR      13.50
dtype: float64
In [ ]:
 
In [55]:
fig = plt.figure(figsize=(18,6))
miss_application_data = pd.DataFrame((application_data.isnull().sum())*100/application_data.shape[0]).reset_index()
miss_application_data["type"] = "application_data"
ax = sns.pointplot("index",0,data=miss_application_data,hue="type")
plt.xticks(rotation =90,fontsize =7)
plt.title("Percentage of Missing values in application_data")
plt.ylabel("PERCENTAGE")
plt.xlabel("COLUMNS")
ax.set_facecolor("k")
fig.set_facecolor("lightgrey")
In [56]:
round(100*(application_data.isnull().sum()/len(application_data.index)),2)
Out[56]:
SK_ID_CURR                     0.00 
TARGET                         0.00 
NAME_CONTRACT_TYPE             0.00 
CODE_GENDER                    0.00 
FLAG_OWN_CAR                   0.00 
FLAG_OWN_REALTY                0.00 
CNT_CHILDREN                   0.00 
AMT_INCOME_TOTAL               0.00 
AMT_CREDIT                     0.00 
AMT_ANNUITY                    0.00 
AMT_GOODS_PRICE                0.09 
NAME_TYPE_SUITE                0.42 
NAME_INCOME_TYPE               0.00 
NAME_EDUCATION_TYPE            0.00 
NAME_FAMILY_STATUS             0.00 
NAME_HOUSING_TYPE              0.00 
REGION_POPULATION_RELATIVE     0.00 
DAYS_BIRTH                     0.00 
DAYS_EMPLOYED                  0.00 
DAYS_REGISTRATION              0.00 
DAYS_ID_PUBLISH                0.00 
FLAG_MOBIL                     0.00 
FLAG_EMP_PHONE                 0.00 
FLAG_WORK_PHONE                0.00 
FLAG_CONT_MOBILE               0.00 
FLAG_PHONE                     0.00 
FLAG_EMAIL                     0.00 
CNT_FAM_MEMBERS                0.00 
REGION_RATING_CLIENT           0.00 
REGION_RATING_CLIENT_W_CITY    0.00 
WEEKDAY_APPR_PROCESS_START     0.00 
HOUR_APPR_PROCESS_START        0.00 
REG_REGION_NOT_LIVE_REGION     0.00 
REG_REGION_NOT_WORK_REGION     0.00 
LIVE_REGION_NOT_WORK_REGION    0.00 
REG_CITY_NOT_LIVE_CITY         0.00 
REG_CITY_NOT_WORK_CITY         0.00 
LIVE_CITY_NOT_WORK_CITY        0.00 
ORGANIZATION_TYPE              0.00 
OBS_30_CNT_SOCIAL_CIRCLE       0.33 
DEF_30_CNT_SOCIAL_CIRCLE       0.33 
OBS_60_CNT_SOCIAL_CIRCLE       0.33 
DEF_60_CNT_SOCIAL_CIRCLE       0.33 
DAYS_LAST_PHONE_CHANGE         0.00 
FLAG_DOCUMENT_2                0.00 
FLAG_DOCUMENT_3                0.00 
FLAG_DOCUMENT_4                0.00 
FLAG_DOCUMENT_5                0.00 
FLAG_DOCUMENT_6                0.00 
FLAG_DOCUMENT_7                0.00 
FLAG_DOCUMENT_8                0.00 
FLAG_DOCUMENT_9                0.00 
FLAG_DOCUMENT_10               0.00 
FLAG_DOCUMENT_11               0.00 
FLAG_DOCUMENT_12               0.00 
FLAG_DOCUMENT_13               0.00 
FLAG_DOCUMENT_14               0.00 
FLAG_DOCUMENT_15               0.00 
FLAG_DOCUMENT_16               0.00 
FLAG_DOCUMENT_17               0.00 
FLAG_DOCUMENT_18               0.00 
FLAG_DOCUMENT_19               0.00 
FLAG_DOCUMENT_20               0.00 
FLAG_DOCUMENT_21               0.00 
AMT_REQ_CREDIT_BUREAU_HOUR     13.50
AMT_REQ_CREDIT_BUREAU_DAY      13.50
AMT_REQ_CREDIT_BUREAU_WEEK     13.50
AMT_REQ_CREDIT_BUREAU_MON      13.50
AMT_REQ_CREDIT_BUREAU_QRT      13.50
AMT_REQ_CREDIT_BUREAU_YEAR     13.50
dtype: float64
In [57]:
print("AMT_REQ_CREDIT_BUREAU_DAY NAN COUNT :" ,application_data['AMT_REQ_CREDIT_BUREAU_DAY'].isnull().sum())
AMT_REQ_CREDIT_BUREAU_DAY NAN COUNT : 41519
In [58]:
application_data['AMT_REQ_CREDIT_BUREAU_DAY'].describe()
Out[58]:
count    265992.000000
mean     0.007000     
std      0.110757     
min      0.000000     
25%      0.000000     
50%      0.000000     
75%      0.000000     
max      9.000000     
Name: AMT_REQ_CREDIT_BUREAU_DAY, dtype: float64
In [59]:
print("AMT_REQ_CREDIT_BUREAU_HOUR NAN COUNT :" ,application_data['AMT_REQ_CREDIT_BUREAU_HOUR'].isnull().sum())
AMT_REQ_CREDIT_BUREAU_HOUR NAN COUNT : 41519
In [65]:
application_data['AMT_REQ_CREDIT_BUREAU_HOUR'].describe()
Out[65]:
count    265992.000000
mean     0.006402     
std      0.083849     
min      0.000000     
25%      0.000000     
50%      0.000000     
75%      0.000000     
max      4.000000     
Name: AMT_REQ_CREDIT_BUREAU_HOUR, dtype: float64
In [61]:
print("AMT_REQ_CREDIT_BUREAU_MON NAN COUNT :" ,application_data['AMT_REQ_CREDIT_BUREAU_MON'].isnull().sum())
AMT_REQ_CREDIT_BUREAU_MON NAN COUNT : 41519
In [67]:
application_data['AMT_REQ_CREDIT_BUREAU_MON'].describe()
Out[67]:
count    265992.000000
mean     0.267395     
std      0.916002     
min      0.000000     
25%      0.000000     
50%      0.000000     
75%      0.000000     
max      27.000000    
Name: AMT_REQ_CREDIT_BUREAU_MON, dtype: float64
In [63]:
print("AMT_REQ_CREDIT_BUREAU_QRT NAN COUNT :" ,application_data['AMT_REQ_CREDIT_BUREAU_QRT'].isnull().sum())
AMT_REQ_CREDIT_BUREAU_QRT NAN COUNT : 41519
In [64]:
print("AMT_REQ_CREDIT_BUREAU_WEEK NAN COUNT :" ,application_data['AMT_REQ_CREDIT_BUREAU_WEEK'].isnull().sum())
AMT_REQ_CREDIT_BUREAU_WEEK NAN COUNT : 41519
In [66]:
application_data['AMT_REQ_CREDIT_BUREAU_WEEK'].describe()
Out[66]:
count    265992.000000
mean     0.034362     
std      0.204685     
min      0.000000     
25%      0.000000     
50%      0.000000     
75%      0.000000     
max      8.000000     
Name: AMT_REQ_CREDIT_BUREAU_WEEK, dtype: float64
In [68]:
print("AMT_REQ_CREDIT_BUREAU_YEAR NAN COUNT :" ,application_data['AMT_REQ_CREDIT_BUREAU_YEAR'].isnull().sum())
AMT_REQ_CREDIT_BUREAU_YEAR NAN COUNT : 41519
In [69]:
application_data['AMT_REQ_CREDIT_BUREAU_YEAR'].describe()
Out[69]:
count    265992.000000
mean     1.899974     
std      1.869295     
min      0.000000     
25%      0.000000     
50%      1.000000     
75%      3.000000     
max      25.000000    
Name: AMT_REQ_CREDIT_BUREAU_YEAR, dtype: float64
In [70]:
print("DEF_30_CNT_SOCIAL_CIRCLE NAN COUNT :" ,application_data['DEF_30_CNT_SOCIAL_CIRCLE'].isnull().sum())
DEF_30_CNT_SOCIAL_CIRCLE NAN COUNT : 1021
In [71]:
application_data['DEF_30_CNT_SOCIAL_CIRCLE'].describe()
Out[71]:
count    306490.000000
mean     0.143421     
std      0.446698     
min      0.000000     
25%      0.000000     
50%      0.000000     
75%      0.000000     
max      34.000000    
Name: DEF_30_CNT_SOCIAL_CIRCLE, dtype: float64
In [72]:
print("DEF_30_CNT_SOCIAL_CIRCLE :" ,application_data['DEF_30_CNT_SOCIAL_CIRCLE'].isnull().sum())
DEF_30_CNT_SOCIAL_CIRCLE : 1021
In [73]:
application_data['DEF_30_CNT_SOCIAL_CIRCLE'].describe()
Out[73]:
count    306490.000000
mean     0.143421     
std      0.446698     
min      0.000000     
25%      0.000000     
50%      0.000000     
75%      0.000000     
max      34.000000    
Name: DEF_30_CNT_SOCIAL_CIRCLE, dtype: float64
In [74]:
print("OBS_60_CNT_SOCIAL_CIRCLE :" ,application_data['OBS_60_CNT_SOCIAL_CIRCLE'].isnull().sum())
OBS_60_CNT_SOCIAL_CIRCLE : 1021
In [75]:
application_data['OBS_60_CNT_SOCIAL_CIRCLE'].describe()
Out[75]:
count    306490.000000
mean     1.405292     
std      2.379803     
min      0.000000     
25%      0.000000     
50%      0.000000     
75%      2.000000     
max      344.000000   
Name: OBS_60_CNT_SOCIAL_CIRCLE, dtype: float64
In [76]:
print("DEF_60_CNT_SOCIAL_CIRCLE :" ,application_data['DEF_60_CNT_SOCIAL_CIRCLE'].isnull().sum())
DEF_60_CNT_SOCIAL_CIRCLE : 1021
In [77]:
application_data['DEF_60_CNT_SOCIAL_CIRCLE'].describe()
Out[77]:
count    306490.000000
mean     0.100049     
std      0.362291     
min      0.000000     
25%      0.000000     
50%      0.000000     
75%      0.000000     
max      24.000000    
Name: DEF_60_CNT_SOCIAL_CIRCLE, dtype: float64
In [78]:
application_data.isnull().sum()
Out[78]:
SK_ID_CURR                     0    
TARGET                         0    
NAME_CONTRACT_TYPE             0    
CODE_GENDER                    0    
FLAG_OWN_CAR                   0    
FLAG_OWN_REALTY                0    
CNT_CHILDREN                   0    
AMT_INCOME_TOTAL               0    
AMT_CREDIT                     0    
AMT_ANNUITY                    12   
AMT_GOODS_PRICE                278  
NAME_TYPE_SUITE                1292 
NAME_INCOME_TYPE               0    
NAME_EDUCATION_TYPE            0    
NAME_FAMILY_STATUS             0    
NAME_HOUSING_TYPE              0    
REGION_POPULATION_RELATIVE     0    
DAYS_BIRTH                     0    
DAYS_EMPLOYED                  0    
DAYS_REGISTRATION              0    
DAYS_ID_PUBLISH                0    
FLAG_MOBIL                     0    
FLAG_EMP_PHONE                 0    
FLAG_WORK_PHONE                0    
FLAG_CONT_MOBILE               0    
FLAG_PHONE                     0    
FLAG_EMAIL                     0    
CNT_FAM_MEMBERS                2    
REGION_RATING_CLIENT           0    
REGION_RATING_CLIENT_W_CITY    0    
WEEKDAY_APPR_PROCESS_START     0    
HOUR_APPR_PROCESS_START        0    
REG_REGION_NOT_LIVE_REGION     0    
REG_REGION_NOT_WORK_REGION     0    
LIVE_REGION_NOT_WORK_REGION    0    
REG_CITY_NOT_LIVE_CITY         0    
REG_CITY_NOT_WORK_CITY         0    
LIVE_CITY_NOT_WORK_CITY        0    
ORGANIZATION_TYPE              0    
OBS_30_CNT_SOCIAL_CIRCLE       1021 
DEF_30_CNT_SOCIAL_CIRCLE       1021 
OBS_60_CNT_SOCIAL_CIRCLE       1021 
DEF_60_CNT_SOCIAL_CIRCLE       1021 
DAYS_LAST_PHONE_CHANGE         1    
FLAG_DOCUMENT_2                0    
FLAG_DOCUMENT_3                0    
FLAG_DOCUMENT_4                0    
FLAG_DOCUMENT_5                0    
FLAG_DOCUMENT_6                0    
FLAG_DOCUMENT_7                0    
FLAG_DOCUMENT_8                0    
FLAG_DOCUMENT_9                0    
FLAG_DOCUMENT_10               0    
FLAG_DOCUMENT_11               0    
FLAG_DOCUMENT_12               0    
FLAG_DOCUMENT_13               0    
FLAG_DOCUMENT_14               0    
FLAG_DOCUMENT_15               0    
FLAG_DOCUMENT_16               0    
FLAG_DOCUMENT_17               0    
FLAG_DOCUMENT_18               0    
FLAG_DOCUMENT_19               0    
FLAG_DOCUMENT_20               0    
FLAG_DOCUMENT_21               0    
AMT_REQ_CREDIT_BUREAU_HOUR     41519
AMT_REQ_CREDIT_BUREAU_DAY      41519
AMT_REQ_CREDIT_BUREAU_WEEK     41519
AMT_REQ_CREDIT_BUREAU_MON      41519
AMT_REQ_CREDIT_BUREAU_QRT      41519
AMT_REQ_CREDIT_BUREAU_YEAR     41519
dtype: int64
In [79]:
print("AMT_ANNUITY  :" ,application_data['AMT_ANNUITY'].isnull().sum())
AMT_ANNUITY  : 12
In [80]:
application_data['AMT_ANNUITY'].describe()
Out[80]:
count    307499.000000
mean     27108.573909 
std      14493.737315 
min      1615.500000  
25%      16524.000000 
50%      24903.000000 
75%      34596.000000 
max      258025.500000
Name: AMT_ANNUITY, dtype: float64
In [81]:
sns.set_style('whitegrid') 
sns.distplot(application_data['AMT_ANNUITY']) 
plt.show()
In [82]:
print("AMT_GOODS_PRICE   :" ,application_data['AMT_GOODS_PRICE'].isnull().sum())
AMT_GOODS_PRICE   : 278
In [83]:
application_data['AMT_GOODS_PRICE'].describe()
Out[83]:
count    3.072330e+05
mean     5.383962e+05
std      3.694465e+05
min      4.050000e+04
25%      2.385000e+05
50%      4.500000e+05
75%      6.795000e+05
max      4.050000e+06
Name: AMT_GOODS_PRICE, dtype: float64
In [84]:
sns.set_style('whitegrid') 
sns.distplot(application_data['AMT_GOODS_PRICE']) 
plt.show()
In [85]:
print("NAME_TYPE_SUITE :" ,application_data['NAME_TYPE_SUITE'].isnull().sum())
NAME_TYPE_SUITE : 1292
In [86]:
application_data['NAME_TYPE_SUITE'].value_counts()
Out[86]:
Unaccompanied      248526
Family             40149 
Spouse, partner    11370 
Children           3267  
Other_B            1770  
Other_A            866   
Group of people    271   
Name: NAME_TYPE_SUITE, dtype: int64
In [87]:
print("CNT_FAM_MEMBERS :" ,application_data['CNT_FAM_MEMBERS'].isnull().sum())
CNT_FAM_MEMBERS : 2
In [88]:
application_data['CNT_FAM_MEMBERS'].describe()
Out[88]:
count    307509.000000
mean     2.152665     
std      0.910682     
min      1.000000     
25%      2.000000     
50%      2.000000     
75%      3.000000     
max      20.000000    
Name: CNT_FAM_MEMBERS, dtype: float64
In [89]:
sns.set_style('whitegrid') 
sns.distplot(application_data['CNT_FAM_MEMBERS']) 
plt.show()
In [90]:
print("DAYS_LAST_PHONE_CHANGE :" ,application_data['DAYS_LAST_PHONE_CHANGE'].isnull().sum())
DAYS_LAST_PHONE_CHANGE : 1
In [91]:
application_data['DAYS_LAST_PHONE_CHANGE'].describe()
Out[91]:
count    307510.000000
mean    -962.858788   
std      826.808487   
min     -4292.000000  
25%     -1570.000000  
50%     -757.000000   
75%     -274.000000   
max      0.000000     
Name: DAYS_LAST_PHONE_CHANGE, dtype: float64
In [92]:
import statistics 
statistics.mode(application_data['DAYS_LAST_PHONE_CHANGE'])
Out[92]:
0.0
In [93]:
print(type(application_data.info()))
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 307511 entries, 0 to 307510
Data columns (total 70 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   SK_ID_CURR                   307511 non-null  int64  
 1   TARGET                       307511 non-null  int64  
 2   NAME_CONTRACT_TYPE           307511 non-null  object 
 3   CODE_GENDER                  307511 non-null  object 
 4   FLAG_OWN_CAR                 307511 non-null  object 
 5   FLAG_OWN_REALTY              307511 non-null  object 
 6   CNT_CHILDREN                 307511 non-null  int64  
 7   AMT_INCOME_TOTAL             307511 non-null  float64
 8   AMT_CREDIT                   307511 non-null  float64
 9   AMT_ANNUITY                  307499 non-null  float64
 10  AMT_GOODS_PRICE              307233 non-null  float64
 11  NAME_TYPE_SUITE              306219 non-null  object 
 12  NAME_INCOME_TYPE             307511 non-null  object 
 13  NAME_EDUCATION_TYPE          307511 non-null  object 
 14  NAME_FAMILY_STATUS           307511 non-null  object 
 15  NAME_HOUSING_TYPE            307511 non-null  object 
 16  REGION_POPULATION_RELATIVE   307511 non-null  float64
 17  DAYS_BIRTH                   307511 non-null  int64  
 18  DAYS_EMPLOYED                307511 non-null  int64  
 19  DAYS_REGISTRATION            307511 non-null  float64
 20  DAYS_ID_PUBLISH              307511 non-null  int64  
 21  FLAG_MOBIL                   307511 non-null  int64  
 22  FLAG_EMP_PHONE               307511 non-null  int64  
 23  FLAG_WORK_PHONE              307511 non-null  int64  
 24  FLAG_CONT_MOBILE             307511 non-null  int64  
 25  FLAG_PHONE                   307511 non-null  int64  
 26  FLAG_EMAIL                   307511 non-null  int64  
 27  CNT_FAM_MEMBERS              307509 non-null  float64
 28  REGION_RATING_CLIENT         307511 non-null  int64  
 29  REGION_RATING_CLIENT_W_CITY  307511 non-null  int64  
 30  WEEKDAY_APPR_PROCESS_START   307511 non-null  object 
 31  HOUR_APPR_PROCESS_START      307511 non-null  int64  
 32  REG_REGION_NOT_LIVE_REGION   307511 non-null  int64  
 33  REG_REGION_NOT_WORK_REGION   307511 non-null  int64  
 34  LIVE_REGION_NOT_WORK_REGION  307511 non-null  int64  
 35  REG_CITY_NOT_LIVE_CITY       307511 non-null  int64  
 36  REG_CITY_NOT_WORK_CITY       307511 non-null  int64  
 37  LIVE_CITY_NOT_WORK_CITY      307511 non-null  int64  
 38  ORGANIZATION_TYPE            307511 non-null  object 
 39  OBS_30_CNT_SOCIAL_CIRCLE     306490 non-null  float64
 40  DEF_30_CNT_SOCIAL_CIRCLE     306490 non-null  float64
 41  OBS_60_CNT_SOCIAL_CIRCLE     306490 non-null  float64
 42  DEF_60_CNT_SOCIAL_CIRCLE     306490 non-null  float64
 43  DAYS_LAST_PHONE_CHANGE       307510 non-null  float64
 44  FLAG_DOCUMENT_2              307511 non-null  int64  
 45  FLAG_DOCUMENT_3              307511 non-null  int64  
 46  FLAG_DOCUMENT_4              307511 non-null  int64  
 47  FLAG_DOCUMENT_5              307511 non-null  int64  
 48  FLAG_DOCUMENT_6              307511 non-null  int64  
 49  FLAG_DOCUMENT_7              307511 non-null  int64  
 50  FLAG_DOCUMENT_8              307511 non-null  int64  
 51  FLAG_DOCUMENT_9              307511 non-null  int64  
 52  FLAG_DOCUMENT_10             307511 non-null  int64  
 53  FLAG_DOCUMENT_11             307511 non-null  int64  
 54  FLAG_DOCUMENT_12             307511 non-null  int64  
 55  FLAG_DOCUMENT_13             307511 non-null  int64  
 56  FLAG_DOCUMENT_14             307511 non-null  int64  
 57  FLAG_DOCUMENT_15             307511 non-null  int64  
 58  FLAG_DOCUMENT_16             307511 non-null  int64  
 59  FLAG_DOCUMENT_17             307511 non-null  int64  
 60  FLAG_DOCUMENT_18             307511 non-null  int64  
 61  FLAG_DOCUMENT_19             307511 non-null  int64  
 62  FLAG_DOCUMENT_20             307511 non-null  int64  
 63  FLAG_DOCUMENT_21             307511 non-null  int64  
 64  AMT_REQ_CREDIT_BUREAU_HOUR   265992 non-null  float64
 65  AMT_REQ_CREDIT_BUREAU_DAY    265992 non-null  float64
 66  AMT_REQ_CREDIT_BUREAU_WEEK   265992 non-null  float64
 67  AMT_REQ_CREDIT_BUREAU_MON    265992 non-null  float64
 68  AMT_REQ_CREDIT_BUREAU_QRT    265992 non-null  float64
 69  AMT_REQ_CREDIT_BUREAU_YEAR   265992 non-null  float64
dtypes: float64(18), int64(41), object(11)
memory usage: 164.2+ MB
<class 'NoneType'>
In [94]:
application_data['DAYS_BIRTH'] = abs(application_data['DAYS_BIRTH'])
application_data['DAYS_ID_PUBLISH'] = abs(application_data['DAYS_ID_PUBLISH'])
application_data['DAYS_ID_PUBLISH'] = abs(application_data['DAYS_ID_PUBLISH'])
application_data['DAYS_LAST_PHONE_CHANGE'] = abs(application_data['DAYS_LAST_PHONE_CHANGE'])
In [95]:
display("application_data")
display(application_data.head())
'application_data'
SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR
0 100002 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 9461 -637 -3648.0 2120 1 1 0 1 1 0 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 2.0 2.0 2.0 2.0 1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0
1 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
2 100004 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 19046 -225 -4260.0 2531 1 1 1 1 1 0 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government 0.0 0.0 0.0 0.0 815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
3 100006 0 Cash loans F N Y 0 135000.0 312682.5 29686.5 297000.0 Unaccompanied Working Secondary / secondary special Civil marriage House / apartment 0.008019 19005 -3039 -9833.0 2437 1 1 0 1 0 0 2.0 2 2 WEDNESDAY 17 0 0 0 0 0 0 Business Entity Type 3 2.0 0.0 2.0 0.0 617.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN
4 100007 0 Cash loans M N Y 0 121500.0 513000.0 21865.5 513000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.028663 19932 -3038 -4311.0 3458 1 1 0 1 0 0 1.0 2 2 THURSDAY 11 0 0 0 0 1 1 Religion 0.0 0.0 0.0 0.0 1106.0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
In [96]:
obj_dtypes = [i for i in application_data.select_dtypes(include=object).columns if i not in ["type"] ]
num_dtypes = [i for i in application_data.select_dtypes(include = np.number).columns if i not in ['SK_ID_CURR'] + [ 'TARGET']]
In [97]:
print(color.BOLD + color.PURPLE + 'Categorical Columns' + color.END, "\n")
for x in range(len(obj_dtypes)): 
    print(obj_dtypes[x])
Categorical Columns 

NAME_CONTRACT_TYPE
CODE_GENDER
FLAG_OWN_CAR
FLAG_OWN_REALTY
NAME_TYPE_SUITE
NAME_INCOME_TYPE
NAME_EDUCATION_TYPE
NAME_FAMILY_STATUS
NAME_HOUSING_TYPE
WEEKDAY_APPR_PROCESS_START
ORGANIZATION_TYPE
In [98]:
print(color.BOLD + color.PURPLE +"Numerical Columns" + color.END, "\n")
for x in range(len(num_dtypes)): 
    print(num_dtypes[x])
Numerical Columns 

CNT_CHILDREN
AMT_INCOME_TOTAL
AMT_CREDIT
AMT_ANNUITY
AMT_GOODS_PRICE
REGION_POPULATION_RELATIVE
DAYS_BIRTH
DAYS_EMPLOYED
DAYS_REGISTRATION
DAYS_ID_PUBLISH
FLAG_MOBIL
FLAG_EMP_PHONE
FLAG_WORK_PHONE
FLAG_CONT_MOBILE
FLAG_PHONE
FLAG_EMAIL
CNT_FAM_MEMBERS
REGION_RATING_CLIENT
REGION_RATING_CLIENT_W_CITY
HOUR_APPR_PROCESS_START
REG_REGION_NOT_LIVE_REGION
REG_REGION_NOT_WORK_REGION
LIVE_REGION_NOT_WORK_REGION
REG_CITY_NOT_LIVE_CITY
REG_CITY_NOT_WORK_CITY
LIVE_CITY_NOT_WORK_CITY
OBS_30_CNT_SOCIAL_CIRCLE
DEF_30_CNT_SOCIAL_CIRCLE
OBS_60_CNT_SOCIAL_CIRCLE
DEF_60_CNT_SOCIAL_CIRCLE
DAYS_LAST_PHONE_CHANGE
FLAG_DOCUMENT_2
FLAG_DOCUMENT_3
FLAG_DOCUMENT_4
FLAG_DOCUMENT_5
FLAG_DOCUMENT_6
FLAG_DOCUMENT_7
FLAG_DOCUMENT_8
FLAG_DOCUMENT_9
FLAG_DOCUMENT_10
FLAG_DOCUMENT_11
FLAG_DOCUMENT_12
FLAG_DOCUMENT_13
FLAG_DOCUMENT_14
FLAG_DOCUMENT_15
FLAG_DOCUMENT_16
FLAG_DOCUMENT_17
FLAG_DOCUMENT_18
FLAG_DOCUMENT_19
FLAG_DOCUMENT_20
FLAG_DOCUMENT_21
AMT_REQ_CREDIT_BUREAU_HOUR
AMT_REQ_CREDIT_BUREAU_DAY
AMT_REQ_CREDIT_BUREAU_WEEK
AMT_REQ_CREDIT_BUREAU_MON
AMT_REQ_CREDIT_BUREAU_QRT
AMT_REQ_CREDIT_BUREAU_YEAR
In [99]:
fig = plt.figure(figsize=(13,6))
plt.subplot(121)
application_data["CODE_GENDER"].value_counts().plot.pie(autopct = "%1.0f%%",colors = ["red","yellow"],startangle = 60,
                                                                        wedgeprops={"linewidth":2,"edgecolor":"k"},explode=[.05,0,0],shadow =True)
plt.title("Distribution of gender")
plt.show()
In [100]:
plt.figure(figsize=(14,7))
plt.subplot(121)
application_data["TARGET"].value_counts().plot.pie(autopct = "%1.0f%%",colors = sns.color_palette("prism",7),startangle = 60,labels=["repayer","defaulter"],
                                                                        wedgeprops={"linewidth":2,"edgecolor":"k"},explode=[.1,0],shadow =True)
plt.title("Distribution of target variable")

plt.subplot(122)
ax = application_data["TARGET"].value_counts().plot(kind="barh")

for i,j in enumerate(application_data["TARGET"].value_counts().values):
    ax.text(.7,i,j,weight = "bold",fontsize=20)

plt.title("Count of target variable")
plt.show()
In [101]:
application_data_x = application_data[[x for x in application_data.columns if x not in ["TARGET"]]]
previous_application_x = previous_application[[x for x in previous_application.columns if x not in ["TARGET"]]]
application_data_x["type"] = "application_data"
previous_application_x["type"] = "previous_application"
data = pd.concat([application_data_x,previous_application_x],axis=0) 
In [102]:
plt.figure(figsize=(14,7))
plt.subplot(121)
data[data["type"] == "application_data"]["NAME_CONTRACT_TYPE"].value_counts().plot.pie(autopct = "%1.0f%%",colors = ["orange","red"],startangle = 60,
                                                                        wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)
circ = plt.Circle((0,0),.7,color="white")
plt.gca().add_artist(circ)
plt.title("distribution of contract types in application_data")

plt.subplot(122)
data[data["type"] == "previous_application"]["NAME_CONTRACT_TYPE"].value_counts().plot.pie(autopct = "%1.2f%%",colors = ["red","yellow","green",'BLACK'],startangle = 60,
                                                                        wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)
circ = plt.Circle((0,0),.7,color="white")
plt.gca().add_artist(circ)
plt.ylabel("")
plt.title("distribution of contract types in previous_application")
plt.show()

plt.show()
In [103]:
fig = plt.figure(figsize=(13,6))
plt.subplot(121)
data[data["type"] == "application_data"]["CODE_GENDER"].value_counts().plot.pie(autopct = "%1.0f%%",colors = ["red","yellow"],startangle = 60,
                                                                        wedgeprops={"linewidth":2,"edgecolor":"k"},explode=[.05,0,0],shadow =True)
plt.title("distribution of gender in application_data")
plt.show()
In [104]:
fig  = plt.figure(figsize=(13,6))
plt.subplot(121)
ax = sns.countplot("NAME_CONTRACT_TYPE",hue="CODE_GENDER",data=data[data["type"] == "application_data"],palette=["r","b","g"])
ax.set_facecolor("lightgrey")
ax.set_title("Distribution of Contract type by gender -application_data")


plt.show()
In [105]:
fig = plt.figure(figsize=(13,6))

plt.subplot(121)
data["FLAG_OWN_CAR"].value_counts().plot.pie(autopct = "%1.0f%%",colors = ["gold","orangered"],startangle = 60,
                                                                        wedgeprops={"linewidth":2,"edgecolor":"k"},explode=[.05,0],shadow =True)
plt.title("distribution of client owning a car")

plt.subplot(122)
data[data["FLAG_OWN_CAR"] == "Y"]["CODE_GENDER"].value_counts().plot.pie(autopct = "%1.0f%%",colors = ["b","orangered"],startangle = 90,
                                                                        wedgeprops={"linewidth":2,"edgecolor":"k"},explode=[.05,0,0],shadow =True)
plt.title("distribution of client owning a car by gender")

plt.show()
In [106]:
plt.figure(figsize=(13,6))
plt.subplot(121)
data["FLAG_OWN_REALTY"].value_counts().plot.pie(autopct = "%1.0f%%",colors = ["skyblue","gold"],startangle = 90,
                                              wedgeprops={"linewidth":2,"edgecolor":"k"},explode=[0.05,0],shadow =True)
plt.title("Distribution of client owns a house or flat")

plt.subplot(122)
data[data["FLAG_OWN_REALTY"] == "Y"]["CODE_GENDER"].value_counts().plot.pie(autopct = "%1.0f%%",colors = ["orangered","b"],startangle = 90,
                                                                        wedgeprops={"linewidth":2,"edgecolor":"k"},explode=[.05,0,0],shadow =True)
plt.title("Distribution of client owning a house or flat by gender")
plt.show()
In [107]:
fig = plt.figure(figsize=(12,10))
plt.subplot(211)
sns.countplot(application_data["CNT_CHILDREN"],palette="Set1",hue=application_data["TARGET"])
plt.legend(loc="upper center")
plt.title(" Distribution of Number of children client has  by repayment status")
plt.subplot(212)
sns.countplot(application_data["CNT_FAM_MEMBERS"],palette="Set1",hue=application_data["TARGET"])
plt.legend(loc="upper center")
plt.title(" Distribution of Number of family members client has  by repayment status")
fig.set_facecolor("lightblue")
In [108]:
default = application_data[application_data["TARGET"]==1][[ 'NAME_CONTRACT_TYPE', 'CODE_GENDER','FLAG_OWN_CAR', 'FLAG_OWN_REALTY']]
non_default = application_data[application_data["TARGET"]==0][[ 'NAME_CONTRACT_TYPE', 'CODE_GENDER','FLAG_OWN_CAR', 'FLAG_OWN_REALTY']]

d_cols = ['NAME_CONTRACT_TYPE', 'CODE_GENDER','FLAG_OWN_CAR', 'FLAG_OWN_REALTY']
d_length = len(d_cols)

fig = plt.figure(figsize=(16,4))
for i,j in itertools.zip_longest(d_cols,range(d_length)):
    plt.subplot(1,4,j+1)
    default[i].value_counts().plot.pie(autopct = "%1.0f%%",colors = sns.color_palette("prism"),startangle = 90,
                                        wedgeprops={"linewidth":1,"edgecolor":"white"},shadow =True)
    circ = plt.Circle((0,0),.7,color="white")
    plt.gca().add_artist(circ)
    plt.ylabel("")
    plt.title(i+"-Defaulter")


fig = plt.figure(figsize=(16,4))
for i,j in itertools.zip_longest(d_cols,range(d_length)):
    plt.subplot(1,4,j+1)
    non_default[i].value_counts().plot.pie(autopct = "%1.0f%%",colors = sns.color_palette("prism",3),startangle = 90,
                                           wedgeprops={"linewidth":1,"edgecolor":"white"},shadow =True)
    circ = plt.Circle((0,0),.7,color="white")
    plt.gca().add_artist(circ)
    plt.ylabel("")
    plt.title(i+"-Repayer")
In [109]:
cols = [ 'AMT_INCOME_TOTAL', 'AMT_CREDIT','AMT_ANNUITY', 'AMT_GOODS_PRICE']
length = len(cols)
cs = ["r","b","g","k"]

ax = plt.figure(figsize=(18,18))
ax.set_facecolor("lightgrey")
for i,j,k in itertools.zip_longest(cols,range(length),cs):
    plt.subplot(2,2,j+1)
    sns.distplot(data[data[i].notnull()][i],color=k)
    plt.axvline(data[i].mean(),label = "mean",linestyle="dashed",color="k")
    plt.legend(loc="best")
    plt.title(i)
    plt.subplots_adjust(hspace = .2)
In [110]:
df = application_data.groupby("TARGET")[cols].describe().transpose().reset_index()
df = df[df["level_1"].isin([ 'mean', 'std', 'min', 'max'])] 
df_x = df[["level_0","level_1",0]]
df_y = df[["level_0","level_1",1]]
df_x = df_x.rename(columns={'level_0':"amount_type", 'level_1':"statistic", 0:"amount"})
df_x["type"] = "REPAYER"
df_y = df_y.rename(columns={'level_0':"amount_type", 'level_1':"statistic", 1:"amount"})
df_y["type"] = "DEFAULTER"
df_new = pd.concat([df_x,df_y],axis = 0)

stat = df_new["statistic"].unique().tolist()
length = len(stat)

plt.figure(figsize=(13,15))

for i,j in itertools.zip_longest(stat,range(length)):
    plt.subplot(2,2,j+1)
    fig = sns.barplot(df_new[df_new["statistic"] == i]["amount_type"],df_new[df_new["statistic"] == i]["amount"],
                hue=df_new[df_new["statistic"] == i]["type"],palette=["g","r"])
    plt.title(i + "--Defaulters vs Non defaulters")
    plt.subplots_adjust(hspace = .4)
    fig.set_facecolor("lightgrey")
In [111]:
cols = [ 'AMT_INCOME_TOTAL', 'AMT_CREDIT','AMT_ANNUITY', 'AMT_GOODS_PRICE']

df1 = data.groupby("CODE_GENDER")[cols].mean().transpose().reset_index()

df_f   = df1[["index","F"]]
df_f   = df_f.rename(columns={'index':"amt_type", 'F':"amount"})
df_f["gender"] = "FEMALE"
df_m   = df1[["index","M"]]
df_m   = df_m.rename(columns={'index':"amt_type", 'M':"amount"})
df_m["gender"] = "MALE"
df_xna = df1[["index","XNA"]]
df_xna = df_xna.rename(columns={'index':"amt_type", 'XNA':"amount"})
df_xna["gender"] = "XNA"

df_gen = pd.concat([df_m,df_f,df_xna],axis=0)

plt.figure(figsize=(12,5))
ax = sns.barplot("amt_type","amount",data=df_gen,hue="gender",palette="Set1")
plt.title("Average Income,credit,annuity & goods_price by gender")
plt.show()
In [112]:
fig = plt.figure(figsize=(10,8))
plt.scatter(application_data[application_data["TARGET"]==0]['AMT_ANNUITY'],application_data[application_data["TARGET"]==0]['AMT_CREDIT'],s=35,
            color="b",alpha=.5,label="REPAYER",linewidth=.5,edgecolor="k")
plt.scatter(application_data[application_data["TARGET"]==1]['AMT_ANNUITY'],application_data[application_data["TARGET"]==1]['AMT_CREDIT'],s=35,
            color="r",alpha=.2,label="DEFAULTER",linewidth=.5,edgecolor="k")
plt.legend(loc="best",prop={"size":15})
plt.xlabel("AMT_ANNUITY")
plt.ylabel("AMT_CREDIT")
plt.title("Scatter plot between credit amount and annuity amount")
plt.show()
In [113]:
amt = application_data[[ 'AMT_INCOME_TOTAL','AMT_CREDIT',
                         'AMT_ANNUITY', 'AMT_GOODS_PRICE',"TARGET"]]
amt = amt[(amt["AMT_GOODS_PRICE"].notnull()) & (amt["AMT_ANNUITY"].notnull())]
sns.pairplot(amt,hue="TARGET",palette=["b","r"])
plt.show()
In [114]:
plt.figure(figsize=(18,12))
plt.subplot(121)
sns.countplot(y=data["NAME_TYPE_SUITE"],
              palette="Set2",
              order=data["NAME_TYPE_SUITE"].value_counts().index[:5])
plt.title("Distribution of Suite type")

plt.subplot(122)
sns.countplot(y=data["NAME_TYPE_SUITE"],
              hue=data["CODE_GENDER"],palette="Set2",
              order=data["NAME_TYPE_SUITE"].value_counts().index[:5])
plt.ylabel("")
plt.title("Distribution of Suite type by gender")
plt.subplots_adjust(wspace = .4)
In [115]:
plt.figure(figsize=(18,12))
plt.subplot(121)
sns.countplot(y=data["NAME_INCOME_TYPE"],
              palette="Set2",
              order=data["NAME_INCOME_TYPE"].value_counts().index[:4])
plt.title("Distribution of client income type")

plt.subplot(122)
sns.countplot(y=data["NAME_INCOME_TYPE"],
              hue=data["CODE_GENDER"],
              palette="Set2",
              order=data["NAME_INCOME_TYPE"].value_counts().index[:4])
plt.ylabel("")
plt.title("Distribution of client income  type by gender")
plt.subplots_adjust(wspace = .4)
In [116]:
plt.figure(figsize=(25,25))
plt.subplot(121)
application_data[application_data["TARGET"]==0]["NAME_EDUCATION_TYPE"].value_counts().plot.pie(fontsize=12,autopct = "%1.0f%%",
                                                                                                 colors = sns.color_palette("Set1"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)
circ = plt.Circle((0,0),.7,color="white")
plt.gca().add_artist(circ)
plt.title("Distribution of Education type for Repayers",color="b")

plt.subplot(122)
application_data[application_data["TARGET"]==1]["NAME_EDUCATION_TYPE"].value_counts().plot.pie(fontsize=12,autopct = "%1.0f%%",
                                                                                                 colors = sns.color_palette("Set1"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)
circ = plt.Circle((0,0),.7,color="white")
plt.gca().add_artist(circ)
plt.title("Distribution of Education type for Defaulters",color="b")
plt.ylabel("")
plt.show()
In [117]:
edu = data.groupby(['NAME_EDUCATION_TYPE','NAME_INCOME_TYPE'])['AMT_INCOME_TOTAL'].mean().reset_index().sort_values(by='AMT_INCOME_TOTAL',ascending=False)
fig = plt.figure(figsize=(13,7))
ax = sns.barplot('NAME_INCOME_TYPE','AMT_INCOME_TOTAL',data=edu,hue='NAME_EDUCATION_TYPE',palette="seismic")
ax.set_facecolor("k")
plt.title(" Average Earnings by different professions and education types")
plt.show()
In [118]:
plt.figure(figsize=(16,8))
plt.subplot(121)
application_data[application_data["TARGET"]==0]["NAME_FAMILY_STATUS"].value_counts().plot.pie(autopct = "%1.0f%%",
                                                             startangle=120,colors = sns.color_palette("Set2",7),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True,explode=[0,.07,0,0,0,0])

plt.title("Distribution of Family status for Repayers",color="b")

plt.subplot(122)
application_data[application_data["TARGET"]==1]["NAME_FAMILY_STATUS"].value_counts().plot.pie(autopct = "%1.0f%%",
                                                    startangle=120,colors = sns.color_palette("Set2",7),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True,explode=[0,.07,0,0,0])


plt.title("Distribution of Family status for Defaulters",color="b")
plt.ylabel("")
plt.show()
In [119]:
plt.figure(figsize=(20,20))
plt.subplot(121)
application_data[application_data["TARGET"]==0]["NAME_HOUSING_TYPE"].value_counts().plot.pie(autopct = "%1.0f%%",fontsize=10,
                                                             colors = sns.color_palette("Spectral"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)

plt.title("Distribution of housing type  for Repayer",color="b")

plt.subplot(122)
application_data[application_data["TARGET"]==1]["NAME_HOUSING_TYPE"].value_counts().plot.pie(autopct = "%1.0f%%",fontsize=10,
                                                    colors = sns.color_palette("Spectral"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)


plt.title("Distribution of housing type for Defaulters",color="b")
plt.ylabel("")
plt.show()
In [120]:
fig = plt.figure(figsize=(13,8))

plt.subplot(121)
sns.violinplot(y=application_data[application_data["TARGET"]==0]["REGION_POPULATION_RELATIVE"]
               ,x=application_data[application_data["TARGET"]==0]["NAME_CONTRACT_TYPE"],
               palette="Set1")
plt.title("Distribution of region population for Non Default loans",color="b")
plt.subplot(122)
sns.violinplot(y = application_data[application_data["TARGET"]==1]["REGION_POPULATION_RELATIVE"]
               ,x=application_data[application_data["TARGET"]==1]["NAME_CONTRACT_TYPE"]
               ,palette="Set1")
plt.title("Distribution of region population  for  Default loans",color="b")

plt.subplots_adjust(wspace = .2)
fig.set_facecolor("lightgrey")
In [122]:
import matplotlib.pyplot as plt
import seaborn as sns

fig = plt.figure(figsize=(13, 15))

plt.subplot(221)
sns.distplot(application_data[application_data["TARGET"] == 0]["DAYS_BIRTH"], color="b")
plt.title("Age Distribution of repayers")

plt.subplot(222)
sns.distplot(application_data[application_data["TARGET"] == 1]["DAYS_BIRTH"], color="r")
plt.title("Age Distribution of defaulters")

plt.subplot(223)
sns.boxplot(x="TARGET", y="DAYS_BIRTH", hue="CODE_GENDER", data=application_data, palette=["b", "grey", "m"])
plt.axhline(application_data["DAYS_BIRTH"].mean(), linestyle="dashed", color="k", label="average age of client")
plt.legend(loc="lower right")
plt.title("Client age vs Loan repayment status (hue=gender)")

plt.subplot(224)
sns.boxplot(x="TARGET", y="DAYS_BIRTH", hue="NAME_CONTRACT_TYPE", data=application_data, palette=["r", "g"])
plt.axhline(application_data["DAYS_BIRTH"].mean(), linestyle="dashed", color="k", label="average age of client")
plt.legend(loc="lower right")
plt.title("Client age vs Loan repayment status (hue=contract type)")

plt.subplots_adjust(wspace=.2, hspace=.3)

fig.set_facecolor("lightgrey")
plt.show()
In [123]:
fig = plt.figure(figsize=(13,5))

plt.subplot(121)
sns.distplot(application_data[application_data["TARGET"]==0]["DAYS_EMPLOYED"],color="b")
plt.title("days employed distribution of repayers")

plt.subplot(122)
sns.distplot(application_data[application_data["TARGET"]==1]["DAYS_EMPLOYED"],color="r")
plt.title("days employed distribution of defaulters")

fig.set_facecolor("ghostwhite")
In [124]:
fig = plt.figure(figsize=(13,5))

plt.subplot(121)
sns.distplot(application_data[application_data["TARGET"]==0]["DAYS_REGISTRATION"],color="b")
plt.title("registration days distribution of repayers")

plt.subplot(122)
sns.distplot(application_data[application_data["TARGET"]==1]["DAYS_REGISTRATION"],color="r")
plt.title("registration days distribution of defaulter")

fig.set_facecolor("ghostwhite")
In [125]:
x   = application_data[['FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE',
       'FLAG_PHONE', 'FLAG_EMAIL',"TARGET"]]
x["TARGET"] = x["TARGET"].replace({0:"repayers",1:"defaulters"})
x  = x.replace({1:"YES",0:"NO"})

cols = ['FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE',
       'FLAG_PHONE', 'FLAG_EMAIL']
length = len(cols)

fig = plt.figure(figsize=(15,12))
fig.set_facecolor("lightgrey")

for i,j in itertools.zip_longest(cols,range(length)):
    plt.subplot(2,3,j+1)
    sns.countplot(x[i],hue=x["TARGET"],palette=["r","g"])
    plt.title(i,color="b")
In [126]:
fig = plt.figure(figsize=(13,13))
plt.subplot(221)
application_data[application_data["TARGET"]==0]["REGION_RATING_CLIENT"].value_counts().plot.pie(autopct = "%1.0f%%",fontsize=12,
                                                             colors = sns.color_palette("Pastel1"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)

plt.title("Distribution of region rating  for Repayers",color="b")

plt.subplot(222)
application_data[application_data["TARGET"]==1]["REGION_RATING_CLIENT"].value_counts().plot.pie(autopct = "%1.0f%%",fontsize=12,
                                                    colors = sns.color_palette("Pastel1"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)


plt.title("Distribution of region rating  for Defaulters",color="b")
plt.ylabel("")

plt.subplot(223)
application_data[application_data["TARGET"]==0]["REGION_RATING_CLIENT_W_CITY"].value_counts().plot.pie(autopct = "%1.0f%%",fontsize=12,
                                                             colors = sns.color_palette("Paired"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)

plt.title("Distribution of city region rating   for Repayers",color="b")

plt.subplot(224)
application_data[application_data["TARGET"]==1]["REGION_RATING_CLIENT_W_CITY"].value_counts().plot.pie(autopct = "%1.0f%%",fontsize=12,
                                                    colors = sns.color_palette("Paired"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)


plt.title("Distribution of city region rating  for Defaulters",color="b")
plt.ylabel("")
fig.set_facecolor("ivory")
In [129]:
day = application_data.groupby("TARGET").agg({"WEEKDAY_APPR_PROCESS_START": "value_counts"})
day = day.rename(columns={"WEEKDAY_APPR_PROCESS_START": "value_counts"}).reset_index()
day_0 = day[:7]
day_1 = day[7:]
day_0["percentage"] = day_0["value_counts"] * 100 / day_0["value_counts"].sum()
day_1["percentage"] = day_1["value_counts"] * 100 / day_1["value_counts"].sum()
days = pd.concat([day_0, day_1], axis=0)

# Create a new column based on the "TARGET" values
days["TARGET"] = days["TARGET"].map({1: "defaulters", 0: "repayers"})

fig = plt.figure(figsize=(13, 15))
plt.subplot(211)
order = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY']
ax = sns.barplot("WEEKDAY_APPR_PROCESS_START", "percentage", data=days,
                hue="TARGET", order=order, palette="prism")
ax.set_facecolor("k")
ax.set_title("Peak days for applying loans (defaulters vs repayers)")

hr = application_data.groupby("TARGET").agg({"HOUR_APPR_PROCESS_START": "value_counts"})
hr = hr.rename(columns={"HOUR_APPR_PROCESS_START": "value_counts"}).reset_index()
hr_0 = hr[hr["TARGET"] == 0]
hr_1 = hr[hr["TARGET"] == 1]
hr_0["percentage"] = hr_0["value_counts"] * 100 / hr_0["value_counts"].sum()
hr_1["percentage"] = hr_1["value_counts"] * 100 / hr_1["value_counts"].sum()
hrs = pd.concat([hr_0, hr_1], axis=0)

# Create a new column based on the "TARGET" values
hrs["TARGET"] = hrs["TARGET"].map({1: "defaulters", 0: "repayers"})

hrs = hrs.sort_values(by="HOUR_APPR_PROCESS_START", ascending=True)

plt.subplot(212)
ax1 = sns.pointplot("HOUR_APPR_PROCESS_START", "percentage", data=hrs, hue="TARGET", palette="prism")
ax1.set_facecolor("k")
ax1.set_title("Peak hours for applying loans (defaulters vs repayers")
fig.set_facecolor("snow")
In [130]:
org = application_data.groupby("TARGET").agg({"ORGANIZATION_TYPE":"value_counts"})
org = org.rename(columns = {"ORGANIZATION_TYPE":"value_counts"}).reset_index()
org_0 = org[org["TARGET"] == 0]
org_1 = org[org["TARGET"] == 1]
org_0["percentage"] = org_0["value_counts"]*100/org_0["value_counts"].sum()
org_1["percentage"] = org_1["value_counts"]*100/org_1["value_counts"].sum()

organization = pd.concat([org_0,org_1],axis=0)
organization = organization.sort_values(by="ORGANIZATION_TYPE",ascending=True)

organization["TARGET"] = organization["TARGET"].replace({0:"repayers",1:"defaulters"})

organization
plt.figure(figsize=(13,7))
ax = sns.pointplot("ORGANIZATION_TYPE","percentage",
                   data=organization,hue="TARGET",palette=["b","r"])
plt.xticks(rotation=90)
plt.grid(True,alpha=.3)
ax.set_facecolor("k")
ax.set_title("Distribution in organization types for repayers and defaulters")
plt.show()
In [131]:
fig = plt.figure(figsize=(20,20))
plt.subplot(421)
sns.boxplot(data=application_data,x='TARGET',y='OBS_30_CNT_SOCIAL_CIRCLE',
            hue="TARGET", palette="Set3")
plt.title("Client's social surroundings with observable 30 DPD (days past due) def",color="b")
plt.subplot(422)
sns.boxplot(data=application_data,x='TARGET',y='DEF_30_CNT_SOCIAL_CIRCLE',
            hue="TARGET", palette="Set3")
plt.title("Client's social surroundings defaulted on 30 DPD (days past due)",color="b")
plt.subplot(423)
sns.boxplot(data=application_data,x='TARGET',y='OBS_60_CNT_SOCIAL_CIRCLE',
            hue="TARGET", palette="Set3")
plt.title("Client's social surroundings with observable 60 DPD (days past due) default",color="b")
plt.subplot(424)
sns.boxplot(data=application_data,x='TARGET',y='DEF_60_CNT_SOCIAL_CIRCLE',
            hue="TARGET", palette="Set3")
plt.title("Client's social surroundings defaulted on 60 DPD (days past due)",color="b")
fig.set_facecolor("ghostwhite")
In [133]:
plt.figure(figsize=(13, 7))
plt.subplot(121)
ax = sns.violinplot(application_data["TARGET"],
                    application_data["DAYS_LAST_PHONE_CHANGE"], palette=["g", "r"])
ax.set_facecolor("oldlace")
ax.set_title("Days before application client changed phone - Violin Plot")
plt.subplot(122)
ax1 = sns.boxplot(x=application_data["TARGET"],
                  y=application_data["DAYS_LAST_PHONE_CHANGE"], palette=["g", "r"])
ax1.set_facecolor("oldlace")
ax1.set_ylabel("")
ax1.set_title("Days before application client changed phone - Box Plot")
plt.subplots_adjust(wspace=0.2)
In [134]:
cols = [ 'FLAG_DOCUMENT_2', 'FLAG_DOCUMENT_3',
       'FLAG_DOCUMENT_4', 'FLAG_DOCUMENT_5', 'FLAG_DOCUMENT_6',
       'FLAG_DOCUMENT_7', 'FLAG_DOCUMENT_8', 'FLAG_DOCUMENT_9',
       'FLAG_DOCUMENT_10', 'FLAG_DOCUMENT_11', 'FLAG_DOCUMENT_12',
       'FLAG_DOCUMENT_13', 'FLAG_DOCUMENT_14', 'FLAG_DOCUMENT_15',
       'FLAG_DOCUMENT_16', 'FLAG_DOCUMENT_17', 'FLAG_DOCUMENT_18',
       'FLAG_DOCUMENT_19', 'FLAG_DOCUMENT_20', 'FLAG_DOCUMENT_21']

df_flag = application_data[cols+["TARGET"]]

length = len(cols)

df_flag["TARGET"] = df_flag["TARGET"].replace({1:"defaulter",0:"repayer"})

fig = plt.figure(figsize=(13,24))
fig.set_facecolor("lightgrey")
for i,j in itertools.zip_longest(cols,range(length)):
    plt.subplot(5,4,j+1)
    ax = sns.countplot(df_flag[i],hue=df_flag["TARGET"],palette=["r","b"])
    plt.yticks(fontsize=5)
    plt.xlabel("")
    plt.title(i)
    ax.set_facecolor("k")
In [135]:
cols = ['AMT_REQ_CREDIT_BUREAU_HOUR', 'AMT_REQ_CREDIT_BUREAU_DAY',
       'AMT_REQ_CREDIT_BUREAU_WEEK', 'AMT_REQ_CREDIT_BUREAU_MON',
       'AMT_REQ_CREDIT_BUREAU_QRT', 'AMT_REQ_CREDIT_BUREAU_YEAR']
application_data.groupby("TARGET")[cols].max().transpose().plot(kind="barh",
                                                                 figsize=(10,5),width=.8)
plt.title("Maximum enquries made by defaulters and repayers")
application_data.groupby("TARGET")[cols].mean().transpose().plot(kind="barh",
                                                                  figsize=(10,5),width=.8)
plt.title("average enquries made by defaulters and repayers")
application_data.groupby("TARGET")[cols].std().transpose().plot(kind="barh",
                                                                 figsize=(10,5),width=.8)
plt.title("standard deviation in enquries made by defaulters and repayers")
plt.show()
In [136]:
x = previous_application.groupby("SK_ID_CURR")["SK_ID_PREV"].count().reset_index()
plt.figure(figsize=(13,7))
ax = sns.distplot(x["SK_ID_PREV"],color="orange")
plt.axvline(x["SK_ID_PREV"].mean(),linestyle="dashed",color="r",label="average")
plt.axvline(x["SK_ID_PREV"].std(),linestyle="dashed",color="b",label="standard deviation")
plt.axvline(x["SK_ID_PREV"].max(),linestyle="dashed",color="g",label="maximum")
plt.legend(loc="best")
plt.title("Current loan id having previous loan applications")
ax.set_facecolor("k")
In [138]:
pip install squarify
Defaulting to user installation because normal site-packages is not writeable
Collecting squarify
  Downloading squarify-0.4.3-py3-none-any.whl (4.3 kB)
Installing collected packages: squarify
Successfully installed squarify-0.4.3
Note: you may need to restart the kernel to use updated packages.
In [140]:
cnts = previous_application["NAME_CONTRACT_TYPE"].value_counts()
import squarify
plt.figure(figsize=(8,6))
squarify.plot(cnts.values,label=cnts.keys(),value=cnts.values,linewidth=2,edgecolor="k",alpha=.8,color=sns.color_palette("Set1"))
plt.axis("off")
plt.title("Contaract types in previous applications")
plt.show()
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_13572\2131491753.py in <module>
      1 cnts = previous_application["NAME_CONTRACT_TYPE"].value_counts()
----> 2 import squarify
      3 plt.figure(figsize=(8,6))
      4 squarify.plot(cnts.values,label=cnts.keys(),value=cnts.values,linewidth=2,edgecolor="k",alpha=.8,color=sns.color_palette("Set1"))
      5 plt.axis("off")

ModuleNotFoundError: No module named 'squarify'
In [141]:
plt.figure(figsize=(20,20))
plt.subplot(211)
ax = sns.kdeplot(previous_application["AMT_APPLICATION"],color="b",linewidth=3)
ax = sns.kdeplot(previous_application[previous_application["AMT_CREDIT"].notnull()]["AMT_CREDIT"],color="r",linewidth=3)
plt.axvline(previous_application[previous_application["AMT_CREDIT"].notnull()]["AMT_CREDIT"].mean(),color="r",linestyle="dashed",label="AMT_APPLICATION_MEAN")
plt.axvline(previous_application["AMT_APPLICATION"].mean(),color="b",linestyle="dashed",label="AMT_APPLICATION_MEAN")
plt.legend(loc="best")
plt.title("Previous loan amounts applied and loan amounts credited.")
ax.set_facecolor("k")

plt.subplot(212)
diff = (previous_application["AMT_CREDIT"] - previous_application["AMT_APPLICATION"]).reset_index()
diff = diff[diff[0].notnull()]
ax1 = sns.kdeplot(diff[0],color="g",linewidth=3,label = "difference in amount requested by client and amount credited")
plt.axvline(diff[0].mean(),color="white",linestyle="dashed",label = "mean")
plt.title("difference in amount requested by client and amount credited")
ax1.legend(loc="best")
ax1.set_facecolor("k")
In [142]:
mn = previous_application.groupby("NAME_CONTRACT_TYPE")[["AMT_APPLICATION","AMT_CREDIT"]].mean().stack().reset_index()
tt = previous_application.groupby("NAME_CONTRACT_TYPE")[["AMT_APPLICATION","AMT_CREDIT"]].sum().stack().reset_index()
fig = plt.figure(figsize=(10,13))
fig.set_facecolor("ghostwhite")
plt.subplot(211)
ax = sns.barplot(0,"NAME_CONTRACT_TYPE",data=mn[:6],hue="level_1",palette="inferno")
ax.set_facecolor("k")
ax.set_xlabel("average amounts")
ax.set_title("Average amounts by contract types")

plt.subplot(212)
ax1 = sns.barplot(0,"NAME_CONTRACT_TYPE",data=tt[:6],hue="level_1",palette="magma")
ax1.set_facecolor("k")
ax1.set_xlabel("total amounts")
ax1.set_title("total amounts by contract types")
plt.subplots_adjust(hspace = .2)
plt.show()
In [143]:
plt.figure(figsize=(14,5))
plt.subplot(121)
previous_application.groupby("NAME_CONTRACT_TYPE")["AMT_ANNUITY"].sum().plot(kind="bar")
plt.xticks(rotation=0)
plt.title("Total annuity amount by contract types in previous applications")
plt.subplot(122)
previous_application.groupby("NAME_CONTRACT_TYPE")["AMT_ANNUITY"].mean().plot(kind="bar")
plt.title("average annuity amount by contract types in previous applications")
plt.xticks(rotation=0)
plt.show()
In [144]:
ax = pd.crosstab(previous_application["NAME_CONTRACT_TYPE"],previous_application["NAME_CONTRACT_STATUS"]).plot(kind="barh",figsize=(10,7),stacked=True)
plt.xticks(rotation =0)
plt.ylabel("count")
plt.title("Count of application status by application type")
ax.set_facecolor("k")
In [145]:
hr = pd.crosstab(previous_application["WEEKDAY_APPR_PROCESS_START"],previous_application["NAME_CONTRACT_STATUS"]).stack().reset_index()
plt.figure(figsize=(12,8))
ax = sns.pointplot(hr["WEEKDAY_APPR_PROCESS_START"],hr[0],hue=hr["NAME_CONTRACT_STATUS"],palette=["g","r","b","orange"],scale=1)
ax.set_facecolor("k")
ax.set_ylabel("count")
ax.set_title("Contract status by weekdays")
plt.grid(True,alpha=.2)
In [146]:
hr = pd.crosstab(previous_application["HOUR_APPR_PROCESS_START"],previous_application["NAME_CONTRACT_STATUS"]).stack().reset_index()
plt.figure(figsize=(12,8))
ax = sns.pointplot(hr["HOUR_APPR_PROCESS_START"],hr[0],hue=hr["NAME_CONTRACT_STATUS"],palette=["g","r","b","orange"],scale=1)
ax.set_facecolor("k")
ax.set_ylabel("count")
ax.set_title("Contract status by day hours.")
plt.grid(True,alpha=.2)
In [147]:
hr = pd.crosstab(previous_application["HOUR_APPR_PROCESS_START"],previous_application["WEEKDAY_APPR_PROCESS_START"]).stack().reset_index()
plt.figure(figsize=(12,8))
ax = sns.pointplot(hr["HOUR_APPR_PROCESS_START"],hr[0],hue=hr["WEEKDAY_APPR_PROCESS_START"],palette=["g","r","b","orange"],scale=1)
ax.set_facecolor("k")
ax.set_ylabel("count")
ax.set_title("Peak hours for week days")
plt.grid(True,alpha=.2)
In [148]:
previous_application[["NAME_CASH_LOAN_PURPOSE","NAME_CONTRACT_STATUS"]]
purpose = pd.crosstab(previous_application["NAME_CASH_LOAN_PURPOSE"],previous_application["NAME_CONTRACT_STATUS"])
purpose["a"] = (purpose["Approved"]*100)/(purpose["Approved"]+purpose["Canceled"]+purpose["Refused"]+purpose["Unused offer"])
purpose["c"] = (purpose["Canceled"]*100)/(purpose["Approved"]+purpose["Canceled"]+purpose["Refused"]+purpose["Unused offer"])
purpose["r"] = (purpose["Refused"]*100)/(purpose["Approved"]+purpose["Canceled"]+purpose["Refused"]+purpose["Unused offer"])
purpose["u"] = (purpose["Unused offer"]*100)/(purpose["Approved"]+purpose["Canceled"]+purpose["Refused"]+purpose["Unused offer"])
purpose_new = purpose[["a","c","r","u"]]
purpose_new = purpose_new.stack().reset_index()
purpose_new["NAME_CONTRACT_STATUS"] = purpose_new["NAME_CONTRACT_STATUS"].replace({"a":"accepted_percentage","c":"cancelled_percentage",
                                                               "r":"refused_percentage","u":"unused_percentage"})

lst = purpose_new["NAME_CONTRACT_STATUS"].unique().tolist()
length = len(lst)
cs = ["lime","orange","r","b"]

fig = plt.figure(figsize=(14,18))
fig.set_facecolor("lightgrey")
for i,j,k in itertools.zip_longest(lst,range(length),cs):
    plt.subplot(2,2,j+1)
    dat = purpose_new[purpose_new["NAME_CONTRACT_STATUS"] == i]
    ax = sns.barplot(0,"NAME_CASH_LOAN_PURPOSE",data=dat.sort_values(by=0,ascending=False),color=k)
    plt.ylabel("")
    plt.xlabel("percentage")
    plt.title(i+" by purpose")
    plt.subplots_adjust(wspace = .7)
    ax.set_facecolor("k")
In [149]:
plt.figure(figsize=(13,6))
sns.violinplot(y= previous_application["DAYS_DECISION"],
               x = previous_application["NAME_CONTRACT_STATUS"],palette=["r","g","b","y"])
plt.axhline(previous_application[previous_application["NAME_CONTRACT_STATUS"] == "Approved"]["DAYS_DECISION"].mean(),
            color="r",linestyle="dashed",label="accepted_average")
plt.axhline(previous_application[previous_application["NAME_CONTRACT_STATUS"] == "Refused"]["DAYS_DECISION"].mean(),
            color="g",linestyle="dashed",label="refused_average")
plt.axhline(previous_application[previous_application["NAME_CONTRACT_STATUS"] == "Cancelled"]["DAYS_DECISION"].mean(),color="b",
            linestyle="dashed",label="cancelled_average")
plt.axhline(previous_application[previous_application["NAME_CONTRACT_STATUS"] == "Unused offer"]["DAYS_DECISION"].mean(),color="y",
            linestyle="dashed",label="un used_average")
plt.legend(loc="best")

plt.title("Contract status relative to decision made about previous application.")
plt.show()
In [150]:
plt.figure(figsize=(8,12))
plt.subplot(211)
rej = previous_application["CODE_REJECT_REASON"].value_counts().reset_index()
ax = sns.barplot("CODE_REJECT_REASON","index",data=rej[:6],palette="husl")
for i,j in enumerate(np.around((rej["CODE_REJECT_REASON"][:6].values*100/(rej["CODE_REJECT_REASON"][:6].sum())))):
    ax.text(.7,i,j,weight="bold")
plt.xlabel("Top as percentage & Bottom as Count")
plt.ylabel("CODE_REJECT_REASON")
plt.title("Reasons for application rejections")

plt.subplot(212)
pay = previous_application["NAME_PAYMENT_TYPE"].value_counts().reset_index()
ax1 = sns.barplot("NAME_PAYMENT_TYPE","index",data=pay,palette="husl")
for i,j in enumerate(np.around((pay["NAME_PAYMENT_TYPE"].values*100/(pay["NAME_PAYMENT_TYPE"].sum())))):
    ax1.text(.7,i,j,weight="bold")
plt.xlabel("pTop as percentage & Bottom as Count")
plt.ylabel("NAME_PAYMENT_TYPE")
plt.title("Clients payment methods")
plt.subplots_adjust(hspace = .3)
In [151]:
plt.figure(figsize=(20,20))
plt.subplot(121)
previous_application["NAME_TYPE_SUITE"].value_counts().plot.pie(autopct = "%1.1f%%",fontsize=12,
                                                             colors = sns.color_palette("inferno"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)
circ = plt.Circle((0,0),.7,color="white")
plt.gca().add_artist(circ)
plt.title("NAME_TYPE_SUITE")

plt.subplot(122)
previous_application["NAME_CLIENT_TYPE"].value_counts().plot.pie(autopct = "%1.1f%%",fontsize=12,
                                                             colors = sns.color_palette("inferno"),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)
circ = plt.Circle((0,0),.7,color="white")
plt.gca().add_artist(circ)
plt.title("NAME_CLIENT_TYPE")
plt.show()
In [152]:
goods = previous_application["NAME_GOODS_CATEGORY"].value_counts().reset_index()
goods["percentage"] = round(goods["NAME_GOODS_CATEGORY"]*100/goods["NAME_GOODS_CATEGORY"].sum(),2)
fig = plt.figure(figsize=(12,5))
ax = sns.pointplot("index","percentage",data=goods,color="yellow")
plt.xticks(rotation = 80)
plt.xlabel("NAME_GOODS_CATEGORY")
plt.ylabel("percentage")
plt.title("popular goods for applying loans")
ax.set_facecolor("k")
fig.set_facecolor('lightgrey')
In [153]:
plt.figure(figsize=(20,20))
plt.subplot(121)
previous_application["NAME_PORTFOLIO"].value_counts().plot.pie(autopct = "%1.1f%%",fontsize=12,
                                                             colors = sns.color_palette("prism",5),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},
                                                               shadow =True)
plt.title("previous applications portfolio")
plt.subplot(122)
previous_application["NAME_PRODUCT_TYPE"].value_counts().plot.pie(autopct = "%1.1f%%",fontsize=12,
                                                             colors = sns.color_palette("prism",3),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},
                                                                  shadow =True)
plt.title("previous applications product types")
plt.show()
In [154]:
app = pd.crosstab(previous_application["CHANNEL_TYPE"],previous_application["NAME_CONTRACT_STATUS"])
app1 = app
app1["approval_rate"] = app1["Approved"]*100/(app1["Approved"]+app1["Refused"]+app1["Canceled"])
app1["refused_rate"]  = app1["Refused"]*100/(app1["Approved"]+app1["Refused"]+app1["Canceled"])
app1["cacelled_rate"] = app1["Canceled"]*100/(app1["Approved"]+app1["Refused"]+app1["Canceled"])
app2 = app[["approval_rate","refused_rate","cacelled_rate"]]
ax = app2.plot(kind="barh",stacked=True,figsize=(10,7))
ax.set_facecolor("k")
ax.set_xlabel("percentage")
ax.set_title("approval,cancel and refusal rates by channel types")
plt.show()
In [155]:
fig = plt.figure(figsize=(13,5))
plt.subplot(121)
are = previous_application.groupby("SELLERPLACE_AREA")["AMT_CREDIT"].sum().reset_index()
are = are.sort_values(by ="AMT_CREDIT",ascending = False)
ax = sns.barplot(y= "AMT_CREDIT",x ="SELLERPLACE_AREA",data=are[:15],color="r")
ax.set_facecolor("k")
ax.set_title("Highest amount credited seller place areas")

plt.subplot(122)
sell = previous_application.groupby("NAME_SELLER_INDUSTRY")["AMT_CREDIT"].sum().reset_index().sort_values(by = "AMT_CREDIT",ascending = False)
ax1=sns.barplot(y = "AMT_CREDIT",x = "NAME_SELLER_INDUSTRY",data=sell,color="b")
ax1.set_facecolor("k")
ax1.set_title("Highest amount credited seller industrys")
plt.xticks(rotation=90)
plt.subplots_adjust(wspace = .5)
fig.set_facecolor("lightgrey")
In [156]:
plt.figure(figsize=(13,5))
ax = sns.countplot(previous_application["CNT_PAYMENT"],palette="Set1",order=previous_application["CNT_PAYMENT"].value_counts().index)
ax.set_facecolor("k")
plt.xticks(rotation = 90)
plt.title("popular terms of previous credit at application")
plt.show()
In [157]:
plt.figure(figsize=(10,8))
sns.countplot(y = previous_application["PRODUCT_COMBINATION"],order=previous_application["PRODUCT_COMBINATION"].value_counts().index)
plt.title("Detailed product combination of the previous application -count")
plt.show()
In [158]:
plt.figure(figsize=(12,6))
plt.subplot(121)
previous_application["NFLAG_INSURED_ON_APPROVAL"].value_counts().plot.pie(autopct = "%1.1f%%",fontsize=8,
                                                             colors = sns.color_palette("prism",4),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)
circ = plt.Circle((0,0),.7,color="white")
plt.gca().add_artist(circ)
plt.title("client requesting insurance")

plt.subplot(122)
previous_application["NAME_YIELD_GROUP"].value_counts().plot.pie(autopct = "%1.1f%%",fontsize=8,
                                                             colors = sns.color_palette("prism",4),
                                              wedgeprops={"linewidth":2,"edgecolor":"white"},shadow =True)
circ = plt.Circle((0,0),.7,color="white")
plt.gca().add_artist(circ)
plt.title("interest rates")
plt.show()
In [159]:
cols = ['DAYS_FIRST_DRAWING', 'DAYS_FIRST_DUE', 'DAYS_LAST_DUE_1ST_VERSION','DAYS_LAST_DUE', 'DAYS_TERMINATION']
plt.figure(figsize=(12,6))
sns.heatmap(previous_application[cols].describe()[1:].transpose(),
            annot=True,linewidth=2,linecolor="k",cmap=sns.color_palette("inferno"))
plt.show()
In [160]:
corrmat = application_data.corr() 
  
f, ax = plt.subplots(figsize =(8, 8)) 
sns.heatmap(corrmat, ax = ax, cmap ="rainbow") 
plt.show()
In [161]:
corrmat = previous_application.corr() 
  
f, ax = plt.subplots(figsize =(8, 8)) 
sns.heatmap(corrmat, ax = ax, cmap ="rainbow") 
plt.show()
In [162]:
corrmat = previous_application.corr() 
corrdf = corrmat.where(np.triu(np.ones(corrmat.shape), k=1).astype(np.bool))
corrdf = corrdf.unstack().reset_index()
corrdf.columns = ['Var1', 'Var2', 'Correlation']
corrdf.dropna(subset = ['Correlation'], inplace = True)
corrdf['Correlation'] = round(corrdf['Correlation'], 2)
corrdf['Correlation'] = abs(corrdf['Correlation'])
corrdf.sort_values(by = 'Correlation', ascending = False).head(10)
Out[162]:
Var1 Var2 Correlation
88 AMT_GOODS_PRICE AMT_APPLICATION 1.00
89 AMT_GOODS_PRICE AMT_CREDIT 0.99
71 AMT_CREDIT AMT_APPLICATION 0.98
269 DAYS_TERMINATION DAYS_LAST_DUE 0.93
87 AMT_GOODS_PRICE AMT_ANNUITY 0.82
70 AMT_CREDIT AMT_ANNUITY 0.82
53 AMT_APPLICATION AMT_ANNUITY 0.81
232 DAYS_LAST_DUE_1ST_VERSION DAYS_FIRST_DRAWING 0.80
173 CNT_PAYMENT AMT_APPLICATION 0.68
174 CNT_PAYMENT AMT_CREDIT 0.67
In [163]:
df_repayer = application_data[application_data['TARGET'] == 0]
df_defaulter = application_data[application_data['TARGET'] == 1]
In [164]:
corrmat = df_repayer.corr() 
corrdf = corrmat.where(np.triu(np.ones(corrmat.shape), k=1).astype(np.bool))
corrdf = corrdf.unstack().reset_index()
corrdf.columns = ['Var1', 'Var2', 'Correlation']
corrdf.dropna(subset = ['Correlation'], inplace = True)
corrdf['Correlation'] = round(corrdf['Correlation'], 2)
corrdf['Correlation'] = abs(corrdf['Correlation'])
corrdf.sort_values(by = 'Correlation', ascending = False).head(10)
Out[164]:
Var1 Var2 Correlation
776 FLAG_EMP_PHONE DAYS_EMPLOYED 1.00
1798 OBS_60_CNT_SOCIAL_CIRCLE OBS_30_CNT_SOCIAL_CIRCLE 1.00
358 AMT_GOODS_PRICE AMT_CREDIT 0.99
1199 REGION_RATING_CLIENT_W_CITY REGION_RATING_CLIENT 0.95
1064 CNT_FAM_MEMBERS CNT_CHILDREN 0.88
1858 DEF_60_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE 0.86
1439 LIVE_REGION_NOT_WORK_REGION REG_REGION_NOT_WORK_REGION 0.86
1619 LIVE_CITY_NOT_WORK_CITY REG_CITY_NOT_WORK_CITY 0.83
359 AMT_GOODS_PRICE AMT_ANNUITY 0.78
299 AMT_ANNUITY AMT_CREDIT 0.77
In [165]:
corrmat = df_defaulter.corr() 
corrdf = corrmat.where(np.triu(np.ones(corrmat.shape), k=1).astype(np.bool))
corrdf = corrdf.unstack().reset_index()
corrdf.columns = ['Var1', 'Var2', 'Correlation']
corrdf.dropna(subset = ['Correlation'], inplace = True)
corrdf['Correlation'] = round(corrdf['Correlation'], 2)
corrdf['Correlation'] = abs(corrdf['Correlation'])
corrdf.sort_values(by = 'Correlation', ascending = False).head(10)
Out[165]:
Var1 Var2 Correlation
1798 OBS_60_CNT_SOCIAL_CIRCLE OBS_30_CNT_SOCIAL_CIRCLE 1.00
776 FLAG_EMP_PHONE DAYS_EMPLOYED 1.00
358 AMT_GOODS_PRICE AMT_CREDIT 0.98
1199 REGION_RATING_CLIENT_W_CITY REGION_RATING_CLIENT 0.96
1064 CNT_FAM_MEMBERS CNT_CHILDREN 0.89
1858 DEF_60_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE 0.87
1439 LIVE_REGION_NOT_WORK_REGION REG_REGION_NOT_WORK_REGION 0.85
1619 LIVE_CITY_NOT_WORK_CITY REG_CITY_NOT_WORK_CITY 0.78
299 AMT_ANNUITY AMT_CREDIT 0.75
359 AMT_GOODS_PRICE AMT_ANNUITY 0.75
In [166]:
mergeddf =  pd.merge(application_data,previous_application,on='SK_ID_CURR')
mergeddf.head()
Out[166]:
SK_ID_CURR TARGET NAME_CONTRACT_TYPE_x CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x NAME_TYPE_SUITE_x NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START_x HOUR_APPR_PROCESS_START_x REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR SK_ID_PREV NAME_CONTRACT_TYPE_y AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y WEEKDAY_APPR_PROCESS_START_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY NAME_CASH_LOAN_PURPOSE NAME_CONTRACT_STATUS DAYS_DECISION NAME_PAYMENT_TYPE CODE_REJECT_REASON NAME_TYPE_SUITE_y NAME_CLIENT_TYPE NAME_GOODS_CATEGORY NAME_PORTFOLIO NAME_PRODUCT_TYPE CHANNEL_TYPE SELLERPLACE_AREA NAME_SELLER_INDUSTRY CNT_PAYMENT NAME_YIELD_GROUP PRODUCT_COMBINATION DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION NFLAG_INSURED_ON_APPROVAL
0 100002 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 9461 -637 -3648.0 2120 1 1 0 1 1 0 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 2.0 2.0 2.0 2.0 1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 1038818 Consumer loans 9251.775 179055.0 179055.0 179055.0 SATURDAY 9 Y 1 XAP Approved -606 XNA XAP NaN New Vehicles POS XNA Stone 500 Auto technology 24.0 low_normal POS other with interest 365243.0 -565.0 125.0 -25.0 -17.0 0.0
1 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1810518 Cash loans 98356.995 900000.0 1035882.0 900000.0 FRIDAY 12 Y 1 XNA Approved -746 XNA XAP Unaccompanied Repeater XNA Cash x-sell Credit and cash offices -1 XNA 12.0 low_normal Cash X-Sell: low 365243.0 -716.0 -386.0 -536.0 -527.0 1.0
2 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2636178 Consumer loans 64567.665 337500.0 348637.5 337500.0 SUNDAY 17 Y 1 XAP Approved -828 Cash through the bank XAP Family Refreshed Furniture POS XNA Stone 1400 Furniture 6.0 middle POS industry with interest 365243.0 -797.0 -647.0 -647.0 -639.0 0.0
3 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2396755 Consumer loans 6737.310 68809.5 68053.5 68809.5 SATURDAY 15 Y 1 XAP Approved -2341 Cash through the bank XAP Family Refreshed Consumer Electronics POS XNA Country-wide 200 Consumer electronics 12.0 middle POS household with interest 365243.0 -2310.0 -1980.0 -1980.0 -1976.0 1.0
4 100004 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 19046 -225 -4260.0 2531 1 1 1 1 1 0 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government 0.0 0.0 0.0 0.0 815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1564014 Consumer loans 5357.250 24282.0 20106.0 24282.0 FRIDAY 5 Y 1 XAP Approved -815 Cash through the bank XAP Unaccompanied New Mobile POS XNA Regional / Local 30 Connectivity 4.0 middle POS mobile without interest 365243.0 -784.0 -694.0 -724.0 -714.0 0.0
In [167]:
y = mergeddf.groupby('SK_ID_CURR').size()
dfA = mergeddf.groupby('SK_ID_CURR').agg({'TARGET': np.sum})
dfA['count'] = y
display(dfA.head(10))
TARGET count
SK_ID_CURR
100002 1 1
100003 0 3
100004 0 1
100006 0 9
100007 0 6
100008 0 5
100009 0 7
100010 0 1
100011 0 4
100012 0 4
In [168]:
dfA.sort_values(by = 'count',ascending=False).head(10)
Out[168]:
TARGET count
SK_ID_CURR
265681 0 73
173680 0 72
242412 0 68
206783 0 67
389950 0 64
382179 0 64
198355 0 63
345161 0 62
446486 0 62
238250 0 61
In [169]:
df_repayer = dfA[dfA['TARGET'] == 0]
df_defaulter = dfA[dfA['TARGET'] == 1]
In [170]:
df_repayer.sort_values(by = 'count',ascending=False).head(10)
Out[170]:
TARGET count
SK_ID_CURR
265681 0 73
173680 0 72
242412 0 68
206783 0 67
382179 0 64
389950 0 64
198355 0 63
446486 0 62
345161 0 62
280586 0 61
In [171]:
df_defaulter.sort_values(by = 'count',ascending=False).head(10)
Out[171]:
TARGET count
SK_ID_CURR
100002 1 1
333349 1 1
333587 1 1
333582 1 1
333534 1 1
333506 1 1
333419 1 1
333355 1 1
333337 1 1
334761 1 1
In [172]:
mergeddf.isnull().sum()
Out[172]:
SK_ID_CURR                      0     
TARGET                          0     
NAME_CONTRACT_TYPE_x            0     
CODE_GENDER                     0     
FLAG_OWN_CAR                    0     
FLAG_OWN_REALTY                 0     
CNT_CHILDREN                    0     
AMT_INCOME_TOTAL                0     
AMT_CREDIT_x                    0     
AMT_ANNUITY_x                   93    
AMT_GOODS_PRICE_x               1208  
NAME_TYPE_SUITE_x               3526  
NAME_INCOME_TYPE                0     
NAME_EDUCATION_TYPE             0     
NAME_FAMILY_STATUS              0     
NAME_HOUSING_TYPE               0     
REGION_POPULATION_RELATIVE      0     
DAYS_BIRTH                      0     
DAYS_EMPLOYED                   0     
DAYS_REGISTRATION               0     
DAYS_ID_PUBLISH                 0     
FLAG_MOBIL                      0     
FLAG_EMP_PHONE                  0     
FLAG_WORK_PHONE                 0     
FLAG_CONT_MOBILE                0     
FLAG_PHONE                      0     
FLAG_EMAIL                      0     
CNT_FAM_MEMBERS                 0     
REGION_RATING_CLIENT            0     
REGION_RATING_CLIENT_W_CITY     0     
WEEKDAY_APPR_PROCESS_START_x    0     
HOUR_APPR_PROCESS_START_x       0     
REG_REGION_NOT_LIVE_REGION      0     
REG_REGION_NOT_WORK_REGION      0     
LIVE_REGION_NOT_WORK_REGION     0     
REG_CITY_NOT_LIVE_CITY          0     
REG_CITY_NOT_WORK_CITY          0     
LIVE_CITY_NOT_WORK_CITY         0     
ORGANIZATION_TYPE               0     
OBS_30_CNT_SOCIAL_CIRCLE        3146  
DEF_30_CNT_SOCIAL_CIRCLE        3146  
OBS_60_CNT_SOCIAL_CIRCLE        3146  
DEF_60_CNT_SOCIAL_CIRCLE        3146  
DAYS_LAST_PHONE_CHANGE          0     
FLAG_DOCUMENT_2                 0     
FLAG_DOCUMENT_3                 0     
FLAG_DOCUMENT_4                 0     
FLAG_DOCUMENT_5                 0     
FLAG_DOCUMENT_6                 0     
FLAG_DOCUMENT_7                 0     
FLAG_DOCUMENT_8                 0     
FLAG_DOCUMENT_9                 0     
FLAG_DOCUMENT_10                0     
FLAG_DOCUMENT_11                0     
FLAG_DOCUMENT_12                0     
FLAG_DOCUMENT_13                0     
FLAG_DOCUMENT_14                0     
FLAG_DOCUMENT_15                0     
FLAG_DOCUMENT_16                0     
FLAG_DOCUMENT_17                0     
FLAG_DOCUMENT_18                0     
FLAG_DOCUMENT_19                0     
FLAG_DOCUMENT_20                0     
FLAG_DOCUMENT_21                0     
AMT_REQ_CREDIT_BUREAU_HOUR      163627
AMT_REQ_CREDIT_BUREAU_DAY       163627
AMT_REQ_CREDIT_BUREAU_WEEK      163627
AMT_REQ_CREDIT_BUREAU_MON       163627
AMT_REQ_CREDIT_BUREAU_QRT       163627
AMT_REQ_CREDIT_BUREAU_YEAR      163627
SK_ID_PREV                      0     
NAME_CONTRACT_TYPE_y            0     
AMT_ANNUITY_y                   307218
AMT_APPLICATION                 0     
AMT_CREDIT_y                    1     
AMT_GOODS_PRICE_y               319525
WEEKDAY_APPR_PROCESS_START_y    0     
HOUR_APPR_PROCESS_START_y       0     
FLAG_LAST_APPL_PER_CONTRACT     0     
NFLAG_LAST_APPL_IN_DAY          0     
NAME_CASH_LOAN_PURPOSE          0     
NAME_CONTRACT_STATUS            0     
DAYS_DECISION                   0     
NAME_PAYMENT_TYPE               0     
CODE_REJECT_REASON              0     
NAME_TYPE_SUITE_y               694672
NAME_CLIENT_TYPE                0     
NAME_GOODS_CATEGORY             0     
NAME_PORTFOLIO                  0     
NAME_PRODUCT_TYPE               0     
CHANNEL_TYPE                    0     
SELLERPLACE_AREA                0     
NAME_SELLER_INDUSTRY            0     
CNT_PAYMENT                     307213
NAME_YIELD_GROUP                0     
PRODUCT_COMBINATION             313   
DAYS_FIRST_DRAWING              561106
DAYS_FIRST_DUE                  561106
DAYS_LAST_DUE_1ST_VERSION       561106
DAYS_LAST_DUE                   561106
DAYS_TERMINATION                561106
NFLAG_INSURED_ON_APPROVAL       561106
dtype: int64
In [173]:
round(100*(mergeddf.isnull().sum()/len(mergeddf.index)), 2)
Out[173]:
SK_ID_CURR                      0.00 
TARGET                          0.00 
NAME_CONTRACT_TYPE_x            0.00 
CODE_GENDER                     0.00 
FLAG_OWN_CAR                    0.00 
FLAG_OWN_REALTY                 0.00 
CNT_CHILDREN                    0.00 
AMT_INCOME_TOTAL                0.00 
AMT_CREDIT_x                    0.00 
AMT_ANNUITY_x                   0.01 
AMT_GOODS_PRICE_x               0.09 
NAME_TYPE_SUITE_x               0.25 
NAME_INCOME_TYPE                0.00 
NAME_EDUCATION_TYPE             0.00 
NAME_FAMILY_STATUS              0.00 
NAME_HOUSING_TYPE               0.00 
REGION_POPULATION_RELATIVE      0.00 
DAYS_BIRTH                      0.00 
DAYS_EMPLOYED                   0.00 
DAYS_REGISTRATION               0.00 
DAYS_ID_PUBLISH                 0.00 
FLAG_MOBIL                      0.00 
FLAG_EMP_PHONE                  0.00 
FLAG_WORK_PHONE                 0.00 
FLAG_CONT_MOBILE                0.00 
FLAG_PHONE                      0.00 
FLAG_EMAIL                      0.00 
CNT_FAM_MEMBERS                 0.00 
REGION_RATING_CLIENT            0.00 
REGION_RATING_CLIENT_W_CITY     0.00 
WEEKDAY_APPR_PROCESS_START_x    0.00 
HOUR_APPR_PROCESS_START_x       0.00 
REG_REGION_NOT_LIVE_REGION      0.00 
REG_REGION_NOT_WORK_REGION      0.00 
LIVE_REGION_NOT_WORK_REGION     0.00 
REG_CITY_NOT_LIVE_CITY          0.00 
REG_CITY_NOT_WORK_CITY          0.00 
LIVE_CITY_NOT_WORK_CITY         0.00 
ORGANIZATION_TYPE               0.00 
OBS_30_CNT_SOCIAL_CIRCLE        0.22 
DEF_30_CNT_SOCIAL_CIRCLE        0.22 
OBS_60_CNT_SOCIAL_CIRCLE        0.22 
DEF_60_CNT_SOCIAL_CIRCLE        0.22 
DAYS_LAST_PHONE_CHANGE          0.00 
FLAG_DOCUMENT_2                 0.00 
FLAG_DOCUMENT_3                 0.00 
FLAG_DOCUMENT_4                 0.00 
FLAG_DOCUMENT_5                 0.00 
FLAG_DOCUMENT_6                 0.00 
FLAG_DOCUMENT_7                 0.00 
FLAG_DOCUMENT_8                 0.00 
FLAG_DOCUMENT_9                 0.00 
FLAG_DOCUMENT_10                0.00 
FLAG_DOCUMENT_11                0.00 
FLAG_DOCUMENT_12                0.00 
FLAG_DOCUMENT_13                0.00 
FLAG_DOCUMENT_14                0.00 
FLAG_DOCUMENT_15                0.00 
FLAG_DOCUMENT_16                0.00 
FLAG_DOCUMENT_17                0.00 
FLAG_DOCUMENT_18                0.00 
FLAG_DOCUMENT_19                0.00 
FLAG_DOCUMENT_20                0.00 
FLAG_DOCUMENT_21                0.00 
AMT_REQ_CREDIT_BUREAU_HOUR      11.57
AMT_REQ_CREDIT_BUREAU_DAY       11.57
AMT_REQ_CREDIT_BUREAU_WEEK      11.57
AMT_REQ_CREDIT_BUREAU_MON       11.57
AMT_REQ_CREDIT_BUREAU_QRT       11.57
AMT_REQ_CREDIT_BUREAU_YEAR      11.57
SK_ID_PREV                      0.00 
NAME_CONTRACT_TYPE_y            0.00 
AMT_ANNUITY_y                   21.73
AMT_APPLICATION                 0.00 
AMT_CREDIT_y                    0.00 
AMT_GOODS_PRICE_y               22.60
WEEKDAY_APPR_PROCESS_START_y    0.00 
HOUR_APPR_PROCESS_START_y       0.00 
FLAG_LAST_APPL_PER_CONTRACT     0.00 
NFLAG_LAST_APPL_IN_DAY          0.00 
NAME_CASH_LOAN_PURPOSE          0.00 
NAME_CONTRACT_STATUS            0.00 
DAYS_DECISION                   0.00 
NAME_PAYMENT_TYPE               0.00 
CODE_REJECT_REASON              0.00 
NAME_TYPE_SUITE_y               49.14
NAME_CLIENT_TYPE                0.00 
NAME_GOODS_CATEGORY             0.00 
NAME_PORTFOLIO                  0.00 
NAME_PRODUCT_TYPE               0.00 
CHANNEL_TYPE                    0.00 
SELLERPLACE_AREA                0.00 
NAME_SELLER_INDUSTRY            0.00 
CNT_PAYMENT                     21.73
NAME_YIELD_GROUP                0.00 
PRODUCT_COMBINATION             0.02 
DAYS_FIRST_DRAWING              39.69
DAYS_FIRST_DUE                  39.69
DAYS_LAST_DUE_1ST_VERSION       39.69
DAYS_LAST_DUE                   39.69
DAYS_TERMINATION                39.69
NFLAG_INSURED_ON_APPROVAL       39.69
dtype: float64
In [174]:
mergeddf.head()
Out[174]:
SK_ID_CURR TARGET NAME_CONTRACT_TYPE_x CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x NAME_TYPE_SUITE_x NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START_x HOUR_APPR_PROCESS_START_x REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR SK_ID_PREV NAME_CONTRACT_TYPE_y AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y WEEKDAY_APPR_PROCESS_START_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY NAME_CASH_LOAN_PURPOSE NAME_CONTRACT_STATUS DAYS_DECISION NAME_PAYMENT_TYPE CODE_REJECT_REASON NAME_TYPE_SUITE_y NAME_CLIENT_TYPE NAME_GOODS_CATEGORY NAME_PORTFOLIO NAME_PRODUCT_TYPE CHANNEL_TYPE SELLERPLACE_AREA NAME_SELLER_INDUSTRY CNT_PAYMENT NAME_YIELD_GROUP PRODUCT_COMBINATION DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION NFLAG_INSURED_ON_APPROVAL
0 100002 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 9461 -637 -3648.0 2120 1 1 0 1 1 0 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 2.0 2.0 2.0 2.0 1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 1038818 Consumer loans 9251.775 179055.0 179055.0 179055.0 SATURDAY 9 Y 1 XAP Approved -606 XNA XAP NaN New Vehicles POS XNA Stone 500 Auto technology 24.0 low_normal POS other with interest 365243.0 -565.0 125.0 -25.0 -17.0 0.0
1 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1810518 Cash loans 98356.995 900000.0 1035882.0 900000.0 FRIDAY 12 Y 1 XNA Approved -746 XNA XAP Unaccompanied Repeater XNA Cash x-sell Credit and cash offices -1 XNA 12.0 low_normal Cash X-Sell: low 365243.0 -716.0 -386.0 -536.0 -527.0 1.0
2 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2636178 Consumer loans 64567.665 337500.0 348637.5 337500.0 SUNDAY 17 Y 1 XAP Approved -828 Cash through the bank XAP Family Refreshed Furniture POS XNA Stone 1400 Furniture 6.0 middle POS industry with interest 365243.0 -797.0 -647.0 -647.0 -639.0 0.0
3 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2396755 Consumer loans 6737.310 68809.5 68053.5 68809.5 SATURDAY 15 Y 1 XAP Approved -2341 Cash through the bank XAP Family Refreshed Consumer Electronics POS XNA Country-wide 200 Consumer electronics 12.0 middle POS household with interest 365243.0 -2310.0 -1980.0 -1980.0 -1976.0 1.0
4 100004 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 19046 -225 -4260.0 2531 1 1 1 1 1 0 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government 0.0 0.0 0.0 0.0 815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1564014 Consumer loans 5357.250 24282.0 20106.0 24282.0 FRIDAY 5 Y 1 XAP Approved -815 Cash through the bank XAP Unaccompanied New Mobile POS XNA Regional / Local 30 Connectivity 4.0 middle POS mobile without interest 365243.0 -784.0 -694.0 -724.0 -714.0 0.0
In [175]:
mergeddf.drop(['SK_ID_CURR'], 1, inplace = True)
In [176]:
mergeddf.head()
Out[176]:
TARGET NAME_CONTRACT_TYPE_x CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x NAME_TYPE_SUITE_x NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START_x HOUR_APPR_PROCESS_START_x REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR SK_ID_PREV NAME_CONTRACT_TYPE_y AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y WEEKDAY_APPR_PROCESS_START_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY NAME_CASH_LOAN_PURPOSE NAME_CONTRACT_STATUS DAYS_DECISION NAME_PAYMENT_TYPE CODE_REJECT_REASON NAME_TYPE_SUITE_y NAME_CLIENT_TYPE NAME_GOODS_CATEGORY NAME_PORTFOLIO NAME_PRODUCT_TYPE CHANNEL_TYPE SELLERPLACE_AREA NAME_SELLER_INDUSTRY CNT_PAYMENT NAME_YIELD_GROUP PRODUCT_COMBINATION DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION NFLAG_INSURED_ON_APPROVAL
0 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 9461 -637 -3648.0 2120 1 1 0 1 1 0 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 2.0 2.0 2.0 2.0 1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 1038818 Consumer loans 9251.775 179055.0 179055.0 179055.0 SATURDAY 9 Y 1 XAP Approved -606 XNA XAP NaN New Vehicles POS XNA Stone 500 Auto technology 24.0 low_normal POS other with interest 365243.0 -565.0 125.0 -25.0 -17.0 0.0
1 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1810518 Cash loans 98356.995 900000.0 1035882.0 900000.0 FRIDAY 12 Y 1 XNA Approved -746 XNA XAP Unaccompanied Repeater XNA Cash x-sell Credit and cash offices -1 XNA 12.0 low_normal Cash X-Sell: low 365243.0 -716.0 -386.0 -536.0 -527.0 1.0
2 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2636178 Consumer loans 64567.665 337500.0 348637.5 337500.0 SUNDAY 17 Y 1 XAP Approved -828 Cash through the bank XAP Family Refreshed Furniture POS XNA Stone 1400 Furniture 6.0 middle POS industry with interest 365243.0 -797.0 -647.0 -647.0 -639.0 0.0
3 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2396755 Consumer loans 6737.310 68809.5 68053.5 68809.5 SATURDAY 15 Y 1 XAP Approved -2341 Cash through the bank XAP Family Refreshed Consumer Electronics POS XNA Country-wide 200 Consumer electronics 12.0 middle POS household with interest 365243.0 -2310.0 -1980.0 -1980.0 -1976.0 1.0
4 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 19046 -225 -4260.0 2531 1 1 1 1 1 0 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government 0.0 0.0 0.0 0.0 815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1564014 Consumer loans 5357.250 24282.0 20106.0 24282.0 FRIDAY 5 Y 1 XAP Approved -815 Cash through the bank XAP Unaccompanied New Mobile POS XNA Regional / Local 30 Connectivity 4.0 middle POS mobile without interest 365243.0 -784.0 -694.0 -724.0 -714.0 0.0
In [177]:
round(100*(mergeddf.isnull().sum()/len(mergeddf.index)), 2)
Out[177]:
TARGET                          0.00 
NAME_CONTRACT_TYPE_x            0.00 
CODE_GENDER                     0.00 
FLAG_OWN_CAR                    0.00 
FLAG_OWN_REALTY                 0.00 
CNT_CHILDREN                    0.00 
AMT_INCOME_TOTAL                0.00 
AMT_CREDIT_x                    0.00 
AMT_ANNUITY_x                   0.01 
AMT_GOODS_PRICE_x               0.09 
NAME_TYPE_SUITE_x               0.25 
NAME_INCOME_TYPE                0.00 
NAME_EDUCATION_TYPE             0.00 
NAME_FAMILY_STATUS              0.00 
NAME_HOUSING_TYPE               0.00 
REGION_POPULATION_RELATIVE      0.00 
DAYS_BIRTH                      0.00 
DAYS_EMPLOYED                   0.00 
DAYS_REGISTRATION               0.00 
DAYS_ID_PUBLISH                 0.00 
FLAG_MOBIL                      0.00 
FLAG_EMP_PHONE                  0.00 
FLAG_WORK_PHONE                 0.00 
FLAG_CONT_MOBILE                0.00 
FLAG_PHONE                      0.00 
FLAG_EMAIL                      0.00 
CNT_FAM_MEMBERS                 0.00 
REGION_RATING_CLIENT            0.00 
REGION_RATING_CLIENT_W_CITY     0.00 
WEEKDAY_APPR_PROCESS_START_x    0.00 
HOUR_APPR_PROCESS_START_x       0.00 
REG_REGION_NOT_LIVE_REGION      0.00 
REG_REGION_NOT_WORK_REGION      0.00 
LIVE_REGION_NOT_WORK_REGION     0.00 
REG_CITY_NOT_LIVE_CITY          0.00 
REG_CITY_NOT_WORK_CITY          0.00 
LIVE_CITY_NOT_WORK_CITY         0.00 
ORGANIZATION_TYPE               0.00 
OBS_30_CNT_SOCIAL_CIRCLE        0.22 
DEF_30_CNT_SOCIAL_CIRCLE        0.22 
OBS_60_CNT_SOCIAL_CIRCLE        0.22 
DEF_60_CNT_SOCIAL_CIRCLE        0.22 
DAYS_LAST_PHONE_CHANGE          0.00 
FLAG_DOCUMENT_2                 0.00 
FLAG_DOCUMENT_3                 0.00 
FLAG_DOCUMENT_4                 0.00 
FLAG_DOCUMENT_5                 0.00 
FLAG_DOCUMENT_6                 0.00 
FLAG_DOCUMENT_7                 0.00 
FLAG_DOCUMENT_8                 0.00 
FLAG_DOCUMENT_9                 0.00 
FLAG_DOCUMENT_10                0.00 
FLAG_DOCUMENT_11                0.00 
FLAG_DOCUMENT_12                0.00 
FLAG_DOCUMENT_13                0.00 
FLAG_DOCUMENT_14                0.00 
FLAG_DOCUMENT_15                0.00 
FLAG_DOCUMENT_16                0.00 
FLAG_DOCUMENT_17                0.00 
FLAG_DOCUMENT_18                0.00 
FLAG_DOCUMENT_19                0.00 
FLAG_DOCUMENT_20                0.00 
FLAG_DOCUMENT_21                0.00 
AMT_REQ_CREDIT_BUREAU_HOUR      11.57
AMT_REQ_CREDIT_BUREAU_DAY       11.57
AMT_REQ_CREDIT_BUREAU_WEEK      11.57
AMT_REQ_CREDIT_BUREAU_MON       11.57
AMT_REQ_CREDIT_BUREAU_QRT       11.57
AMT_REQ_CREDIT_BUREAU_YEAR      11.57
SK_ID_PREV                      0.00 
NAME_CONTRACT_TYPE_y            0.00 
AMT_ANNUITY_y                   21.73
AMT_APPLICATION                 0.00 
AMT_CREDIT_y                    0.00 
AMT_GOODS_PRICE_y               22.60
WEEKDAY_APPR_PROCESS_START_y    0.00 
HOUR_APPR_PROCESS_START_y       0.00 
FLAG_LAST_APPL_PER_CONTRACT     0.00 
NFLAG_LAST_APPL_IN_DAY          0.00 
NAME_CASH_LOAN_PURPOSE          0.00 
NAME_CONTRACT_STATUS            0.00 
DAYS_DECISION                   0.00 
NAME_PAYMENT_TYPE               0.00 
CODE_REJECT_REASON              0.00 
NAME_TYPE_SUITE_y               49.14
NAME_CLIENT_TYPE                0.00 
NAME_GOODS_CATEGORY             0.00 
NAME_PORTFOLIO                  0.00 
NAME_PRODUCT_TYPE               0.00 
CHANNEL_TYPE                    0.00 
SELLERPLACE_AREA                0.00 
NAME_SELLER_INDUSTRY            0.00 
CNT_PAYMENT                     21.73
NAME_YIELD_GROUP                0.00 
PRODUCT_COMBINATION             0.02 
DAYS_FIRST_DRAWING              39.69
DAYS_FIRST_DUE                  39.69
DAYS_LAST_DUE_1ST_VERSION       39.69
DAYS_LAST_DUE                   39.69
DAYS_TERMINATION                39.69
NFLAG_INSURED_ON_APPROVAL       39.69
dtype: float64
In [178]:
enq_cs =['AMT_REQ_CREDIT_BUREAU_DAY', 'AMT_REQ_CREDIT_BUREAU_HOUR',
       'AMT_REQ_CREDIT_BUREAU_MON', 'AMT_REQ_CREDIT_BUREAU_QRT',
       'AMT_REQ_CREDIT_BUREAU_WEEK', 'AMT_REQ_CREDIT_BUREAU_YEAR']
for i in enq_cs:
    mergeddf[i] = mergeddf[i].fillna(0)
In [179]:
amt_cs = ["AMT_ANNUITY_y","AMT_GOODS_PRICE_y"]
for i in amt_cs:
    mergeddf[i] = mergeddf[i].fillna(mergeddf[i].mean())
In [180]:
cols = ["DAYS_FIRST_DRAWING","DAYS_FIRST_DUE","DAYS_LAST_DUE_1ST_VERSION",
        "DAYS_LAST_DUE","DAYS_TERMINATION",'CNT_PAYMENT']
for i in cols :
    mergeddf[i]  = mergeddf[i].fillna(mergeddf[i].median())
In [181]:
cols = ["NAME_TYPE_SUITE_y","NFLAG_INSURED_ON_APPROVAL"]
for i in cols :
    mergeddf[i]  = mergeddf[i].fillna(mergeddf[i].mode()[0])
In [182]:
mergeddf.dropna(inplace = True)
In [183]:
round(100*(mergeddf.isnull().sum()/len(mergeddf.index)), 2)
Out[183]:
TARGET                          0.0
NAME_CONTRACT_TYPE_x            0.0
CODE_GENDER                     0.0
FLAG_OWN_CAR                    0.0
FLAG_OWN_REALTY                 0.0
CNT_CHILDREN                    0.0
AMT_INCOME_TOTAL                0.0
AMT_CREDIT_x                    0.0
AMT_ANNUITY_x                   0.0
AMT_GOODS_PRICE_x               0.0
NAME_TYPE_SUITE_x               0.0
NAME_INCOME_TYPE                0.0
NAME_EDUCATION_TYPE             0.0
NAME_FAMILY_STATUS              0.0
NAME_HOUSING_TYPE               0.0
REGION_POPULATION_RELATIVE      0.0
DAYS_BIRTH                      0.0
DAYS_EMPLOYED                   0.0
DAYS_REGISTRATION               0.0
DAYS_ID_PUBLISH                 0.0
FLAG_MOBIL                      0.0
FLAG_EMP_PHONE                  0.0
FLAG_WORK_PHONE                 0.0
FLAG_CONT_MOBILE                0.0
FLAG_PHONE                      0.0
FLAG_EMAIL                      0.0
CNT_FAM_MEMBERS                 0.0
REGION_RATING_CLIENT            0.0
REGION_RATING_CLIENT_W_CITY     0.0
WEEKDAY_APPR_PROCESS_START_x    0.0
HOUR_APPR_PROCESS_START_x       0.0
REG_REGION_NOT_LIVE_REGION      0.0
REG_REGION_NOT_WORK_REGION      0.0
LIVE_REGION_NOT_WORK_REGION     0.0
REG_CITY_NOT_LIVE_CITY          0.0
REG_CITY_NOT_WORK_CITY          0.0
LIVE_CITY_NOT_WORK_CITY         0.0
ORGANIZATION_TYPE               0.0
OBS_30_CNT_SOCIAL_CIRCLE        0.0
DEF_30_CNT_SOCIAL_CIRCLE        0.0
OBS_60_CNT_SOCIAL_CIRCLE        0.0
DEF_60_CNT_SOCIAL_CIRCLE        0.0
DAYS_LAST_PHONE_CHANGE          0.0
FLAG_DOCUMENT_2                 0.0
FLAG_DOCUMENT_3                 0.0
FLAG_DOCUMENT_4                 0.0
FLAG_DOCUMENT_5                 0.0
FLAG_DOCUMENT_6                 0.0
FLAG_DOCUMENT_7                 0.0
FLAG_DOCUMENT_8                 0.0
FLAG_DOCUMENT_9                 0.0
FLAG_DOCUMENT_10                0.0
FLAG_DOCUMENT_11                0.0
FLAG_DOCUMENT_12                0.0
FLAG_DOCUMENT_13                0.0
FLAG_DOCUMENT_14                0.0
FLAG_DOCUMENT_15                0.0
FLAG_DOCUMENT_16                0.0
FLAG_DOCUMENT_17                0.0
FLAG_DOCUMENT_18                0.0
FLAG_DOCUMENT_19                0.0
FLAG_DOCUMENT_20                0.0
FLAG_DOCUMENT_21                0.0
AMT_REQ_CREDIT_BUREAU_HOUR      0.0
AMT_REQ_CREDIT_BUREAU_DAY       0.0
AMT_REQ_CREDIT_BUREAU_WEEK      0.0
AMT_REQ_CREDIT_BUREAU_MON       0.0
AMT_REQ_CREDIT_BUREAU_QRT       0.0
AMT_REQ_CREDIT_BUREAU_YEAR      0.0
SK_ID_PREV                      0.0
NAME_CONTRACT_TYPE_y            0.0
AMT_ANNUITY_y                   0.0
AMT_APPLICATION                 0.0
AMT_CREDIT_y                    0.0
AMT_GOODS_PRICE_y               0.0
WEEKDAY_APPR_PROCESS_START_y    0.0
HOUR_APPR_PROCESS_START_y       0.0
FLAG_LAST_APPL_PER_CONTRACT     0.0
NFLAG_LAST_APPL_IN_DAY          0.0
NAME_CASH_LOAN_PURPOSE          0.0
NAME_CONTRACT_STATUS            0.0
DAYS_DECISION                   0.0
NAME_PAYMENT_TYPE               0.0
CODE_REJECT_REASON              0.0
NAME_TYPE_SUITE_y               0.0
NAME_CLIENT_TYPE                0.0
NAME_GOODS_CATEGORY             0.0
NAME_PORTFOLIO                  0.0
NAME_PRODUCT_TYPE               0.0
CHANNEL_TYPE                    0.0
SELLERPLACE_AREA                0.0
NAME_SELLER_INDUSTRY            0.0
CNT_PAYMENT                     0.0
NAME_YIELD_GROUP                0.0
PRODUCT_COMBINATION             0.0
DAYS_FIRST_DRAWING              0.0
DAYS_FIRST_DUE                  0.0
DAYS_LAST_DUE_1ST_VERSION       0.0
DAYS_LAST_DUE                   0.0
DAYS_TERMINATION                0.0
NFLAG_INSURED_ON_APPROVAL       0.0
dtype: float64
In [184]:
mergeddf.isnull().sum()
Out[184]:
TARGET                          0
NAME_CONTRACT_TYPE_x            0
CODE_GENDER                     0
FLAG_OWN_CAR                    0
FLAG_OWN_REALTY                 0
CNT_CHILDREN                    0
AMT_INCOME_TOTAL                0
AMT_CREDIT_x                    0
AMT_ANNUITY_x                   0
AMT_GOODS_PRICE_x               0
NAME_TYPE_SUITE_x               0
NAME_INCOME_TYPE                0
NAME_EDUCATION_TYPE             0
NAME_FAMILY_STATUS              0
NAME_HOUSING_TYPE               0
REGION_POPULATION_RELATIVE      0
DAYS_BIRTH                      0
DAYS_EMPLOYED                   0
DAYS_REGISTRATION               0
DAYS_ID_PUBLISH                 0
FLAG_MOBIL                      0
FLAG_EMP_PHONE                  0
FLAG_WORK_PHONE                 0
FLAG_CONT_MOBILE                0
FLAG_PHONE                      0
FLAG_EMAIL                      0
CNT_FAM_MEMBERS                 0
REGION_RATING_CLIENT            0
REGION_RATING_CLIENT_W_CITY     0
WEEKDAY_APPR_PROCESS_START_x    0
HOUR_APPR_PROCESS_START_x       0
REG_REGION_NOT_LIVE_REGION      0
REG_REGION_NOT_WORK_REGION      0
LIVE_REGION_NOT_WORK_REGION     0
REG_CITY_NOT_LIVE_CITY          0
REG_CITY_NOT_WORK_CITY          0
LIVE_CITY_NOT_WORK_CITY         0
ORGANIZATION_TYPE               0
OBS_30_CNT_SOCIAL_CIRCLE        0
DEF_30_CNT_SOCIAL_CIRCLE        0
OBS_60_CNT_SOCIAL_CIRCLE        0
DEF_60_CNT_SOCIAL_CIRCLE        0
DAYS_LAST_PHONE_CHANGE          0
FLAG_DOCUMENT_2                 0
FLAG_DOCUMENT_3                 0
FLAG_DOCUMENT_4                 0
FLAG_DOCUMENT_5                 0
FLAG_DOCUMENT_6                 0
FLAG_DOCUMENT_7                 0
FLAG_DOCUMENT_8                 0
FLAG_DOCUMENT_9                 0
FLAG_DOCUMENT_10                0
FLAG_DOCUMENT_11                0
FLAG_DOCUMENT_12                0
FLAG_DOCUMENT_13                0
FLAG_DOCUMENT_14                0
FLAG_DOCUMENT_15                0
FLAG_DOCUMENT_16                0
FLAG_DOCUMENT_17                0
FLAG_DOCUMENT_18                0
FLAG_DOCUMENT_19                0
FLAG_DOCUMENT_20                0
FLAG_DOCUMENT_21                0
AMT_REQ_CREDIT_BUREAU_HOUR      0
AMT_REQ_CREDIT_BUREAU_DAY       0
AMT_REQ_CREDIT_BUREAU_WEEK      0
AMT_REQ_CREDIT_BUREAU_MON       0
AMT_REQ_CREDIT_BUREAU_QRT       0
AMT_REQ_CREDIT_BUREAU_YEAR      0
SK_ID_PREV                      0
NAME_CONTRACT_TYPE_y            0
AMT_ANNUITY_y                   0
AMT_APPLICATION                 0
AMT_CREDIT_y                    0
AMT_GOODS_PRICE_y               0
WEEKDAY_APPR_PROCESS_START_y    0
HOUR_APPR_PROCESS_START_y       0
FLAG_LAST_APPL_PER_CONTRACT     0
NFLAG_LAST_APPL_IN_DAY          0
NAME_CASH_LOAN_PURPOSE          0
NAME_CONTRACT_STATUS            0
DAYS_DECISION                   0
NAME_PAYMENT_TYPE               0
CODE_REJECT_REASON              0
NAME_TYPE_SUITE_y               0
NAME_CLIENT_TYPE                0
NAME_GOODS_CATEGORY             0
NAME_PORTFOLIO                  0
NAME_PRODUCT_TYPE               0
CHANNEL_TYPE                    0
SELLERPLACE_AREA                0
NAME_SELLER_INDUSTRY            0
CNT_PAYMENT                     0
NAME_YIELD_GROUP                0
PRODUCT_COMBINATION             0
DAYS_FIRST_DRAWING              0
DAYS_FIRST_DUE                  0
DAYS_LAST_DUE_1ST_VERSION       0
DAYS_LAST_DUE                   0
DAYS_TERMINATION                0
NFLAG_INSURED_ON_APPROVAL       0
dtype: int64
In [185]:
mergeddf.head()
Out[185]:
TARGET NAME_CONTRACT_TYPE_x CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x NAME_TYPE_SUITE_x NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START_x HOUR_APPR_PROCESS_START_x REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR SK_ID_PREV NAME_CONTRACT_TYPE_y AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y WEEKDAY_APPR_PROCESS_START_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY NAME_CASH_LOAN_PURPOSE NAME_CONTRACT_STATUS DAYS_DECISION NAME_PAYMENT_TYPE CODE_REJECT_REASON NAME_TYPE_SUITE_y NAME_CLIENT_TYPE NAME_GOODS_CATEGORY NAME_PORTFOLIO NAME_PRODUCT_TYPE CHANNEL_TYPE SELLERPLACE_AREA NAME_SELLER_INDUSTRY CNT_PAYMENT NAME_YIELD_GROUP PRODUCT_COMBINATION DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION NFLAG_INSURED_ON_APPROVAL
0 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 9461 -637 -3648.0 2120 1 1 0 1 1 0 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 2.0 2.0 2.0 2.0 1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 1038818 Consumer loans 9251.775 179055.0 179055.0 179055.0 SATURDAY 9 Y 1 XAP Approved -606 XNA XAP Unaccompanied New Vehicles POS XNA Stone 500 Auto technology 24.0 low_normal POS other with interest 365243.0 -565.0 125.0 -25.0 -17.0 0.0
1 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1810518 Cash loans 98356.995 900000.0 1035882.0 900000.0 FRIDAY 12 Y 1 XNA Approved -746 XNA XAP Unaccompanied Repeater XNA Cash x-sell Credit and cash offices -1 XNA 12.0 low_normal Cash X-Sell: low 365243.0 -716.0 -386.0 -536.0 -527.0 1.0
2 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2636178 Consumer loans 64567.665 337500.0 348637.5 337500.0 SUNDAY 17 Y 1 XAP Approved -828 Cash through the bank XAP Family Refreshed Furniture POS XNA Stone 1400 Furniture 6.0 middle POS industry with interest 365243.0 -797.0 -647.0 -647.0 -639.0 0.0
3 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2396755 Consumer loans 6737.310 68809.5 68053.5 68809.5 SATURDAY 15 Y 1 XAP Approved -2341 Cash through the bank XAP Family Refreshed Consumer Electronics POS XNA Country-wide 200 Consumer electronics 12.0 middle POS household with interest 365243.0 -2310.0 -1980.0 -1980.0 -1976.0 1.0
4 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 19046 -225 -4260.0 2531 1 1 1 1 1 0 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government 0.0 0.0 0.0 0.0 815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1564014 Consumer loans 5357.250 24282.0 20106.0 24282.0 FRIDAY 5 Y 1 XAP Approved -815 Cash through the bank XAP Unaccompanied New Mobile POS XNA Regional / Local 30 Connectivity 4.0 middle POS mobile without interest 365243.0 -784.0 -694.0 -724.0 -714.0 0.0
In [186]:
# List of variables to map

varlist =  ['FLAG_OWN_CAR','FLAG_OWN_REALTY','FLAG_LAST_APPL_PER_CONTRACT']

# Defining the map function
def binary_map(x):
    return x.map({'Y': 1, "N": 0})

# Applying the function to the housing list
mergeddf[varlist] = mergeddf[varlist].apply(binary_map)
mergeddf.head()
Out[186]:
TARGET NAME_CONTRACT_TYPE_x CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x NAME_TYPE_SUITE_x NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START_x HOUR_APPR_PROCESS_START_x REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR SK_ID_PREV NAME_CONTRACT_TYPE_y AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y WEEKDAY_APPR_PROCESS_START_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY NAME_CASH_LOAN_PURPOSE NAME_CONTRACT_STATUS DAYS_DECISION NAME_PAYMENT_TYPE CODE_REJECT_REASON NAME_TYPE_SUITE_y NAME_CLIENT_TYPE NAME_GOODS_CATEGORY NAME_PORTFOLIO NAME_PRODUCT_TYPE CHANNEL_TYPE SELLERPLACE_AREA NAME_SELLER_INDUSTRY CNT_PAYMENT NAME_YIELD_GROUP PRODUCT_COMBINATION DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION NFLAG_INSURED_ON_APPROVAL
0 1 Cash loans M 0 1 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 9461 -637 -3648.0 2120 1 1 0 1 1 0 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 2.0 2.0 2.0 2.0 1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 1038818 Consumer loans 9251.775 179055.0 179055.0 179055.0 SATURDAY 9 1 1 XAP Approved -606 XNA XAP Unaccompanied New Vehicles POS XNA Stone 500 Auto technology 24.0 low_normal POS other with interest 365243.0 -565.0 125.0 -25.0 -17.0 0.0
1 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1810518 Cash loans 98356.995 900000.0 1035882.0 900000.0 FRIDAY 12 1 1 XNA Approved -746 XNA XAP Unaccompanied Repeater XNA Cash x-sell Credit and cash offices -1 XNA 12.0 low_normal Cash X-Sell: low 365243.0 -716.0 -386.0 -536.0 -527.0 1.0
2 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2636178 Consumer loans 64567.665 337500.0 348637.5 337500.0 SUNDAY 17 1 1 XAP Approved -828 Cash through the bank XAP Family Refreshed Furniture POS XNA Stone 1400 Furniture 6.0 middle POS industry with interest 365243.0 -797.0 -647.0 -647.0 -639.0 0.0
3 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 2396755 Consumer loans 6737.310 68809.5 68053.5 68809.5 SATURDAY 15 1 1 XAP Approved -2341 Cash through the bank XAP Family Refreshed Consumer Electronics POS XNA Country-wide 200 Consumer electronics 12.0 middle POS household with interest 365243.0 -2310.0 -1980.0 -1980.0 -1976.0 1.0
4 0 Revolving loans M 1 1 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 19046 -225 -4260.0 2531 1 1 1 1 1 0 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government 0.0 0.0 0.0 0.0 815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 1564014 Consumer loans 5357.250 24282.0 20106.0 24282.0 FRIDAY 5 1 1 XAP Approved -815 Cash through the bank XAP Unaccompanied New Mobile POS XNA Regional / Local 30 Connectivity 4.0 middle POS mobile without interest 365243.0 -784.0 -694.0 -724.0 -714.0 0.0
In [187]:
mergeddf.drop(['SK_ID_PREV'], 1, inplace = True)
mergeddf.head()
Out[187]:
TARGET NAME_CONTRACT_TYPE_x CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x NAME_TYPE_SUITE_x NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START_x HOUR_APPR_PROCESS_START_x REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR NAME_CONTRACT_TYPE_y AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y WEEKDAY_APPR_PROCESS_START_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY NAME_CASH_LOAN_PURPOSE NAME_CONTRACT_STATUS DAYS_DECISION NAME_PAYMENT_TYPE CODE_REJECT_REASON NAME_TYPE_SUITE_y NAME_CLIENT_TYPE NAME_GOODS_CATEGORY NAME_PORTFOLIO NAME_PRODUCT_TYPE CHANNEL_TYPE SELLERPLACE_AREA NAME_SELLER_INDUSTRY CNT_PAYMENT NAME_YIELD_GROUP PRODUCT_COMBINATION DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION NFLAG_INSURED_ON_APPROVAL
0 1 Cash loans M 0 1 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 9461 -637 -3648.0 2120 1 1 0 1 1 0 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 2.0 2.0 2.0 2.0 1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 Consumer loans 9251.775 179055.0 179055.0 179055.0 SATURDAY 9 1 1 XAP Approved -606 XNA XAP Unaccompanied New Vehicles POS XNA Stone 500 Auto technology 24.0 low_normal POS other with interest 365243.0 -565.0 125.0 -25.0 -17.0 0.0
1 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 Cash loans 98356.995 900000.0 1035882.0 900000.0 FRIDAY 12 1 1 XNA Approved -746 XNA XAP Unaccompanied Repeater XNA Cash x-sell Credit and cash offices -1 XNA 12.0 low_normal Cash X-Sell: low 365243.0 -716.0 -386.0 -536.0 -527.0 1.0
2 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 Consumer loans 64567.665 337500.0 348637.5 337500.0 SUNDAY 17 1 1 XAP Approved -828 Cash through the bank XAP Family Refreshed Furniture POS XNA Stone 1400 Furniture 6.0 middle POS industry with interest 365243.0 -797.0 -647.0 -647.0 -639.0 0.0
3 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 Consumer loans 6737.310 68809.5 68053.5 68809.5 SATURDAY 15 1 1 XAP Approved -2341 Cash through the bank XAP Family Refreshed Consumer Electronics POS XNA Country-wide 200 Consumer electronics 12.0 middle POS household with interest 365243.0 -2310.0 -1980.0 -1980.0 -1976.0 1.0
4 0 Revolving loans M 1 1 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 19046 -225 -4260.0 2531 1 1 1 1 1 0 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government 0.0 0.0 0.0 0.0 815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 Consumer loans 5357.250 24282.0 20106.0 24282.0 FRIDAY 5 1 1 XAP Approved -815 Cash through the bank XAP Unaccompanied New Mobile POS XNA Regional / Local 30 Connectivity 4.0 middle POS mobile without interest 365243.0 -784.0 -694.0 -724.0 -714.0 0.0
In [188]:
mergeddf[['FLAG_OWN_CAR','FLAG_OWN_REALTY','FLAG_MOBIL',
 'FLAG_EMP_PHONE','FLAG_WORK_PHONE','FLAG_CONT_MOBILE',
 'FLAG_PHONE','FLAG_EMAIL','REGION_RATING_CLIENT',
 'REGION_RATING_CLIENT_W_CITY','REG_REGION_NOT_LIVE_REGION',
 'REG_REGION_NOT_WORK_REGION','LIVE_REGION_NOT_WORK_REGION',
 'REG_CITY_NOT_LIVE_CITY','REG_CITY_NOT_WORK_CITY','FLAG_DOCUMENT_2',
 'FLAG_DOCUMENT_3','FLAG_DOCUMENT_4','FLAG_DOCUMENT_5','FLAG_DOCUMENT_6',
 'FLAG_DOCUMENT_7','FLAG_DOCUMENT_8','FLAG_DOCUMENT_9',
 'FLAG_DOCUMENT_10','FLAG_DOCUMENT_11','FLAG_DOCUMENT_12','FLAG_DOCUMENT_13',
 'FLAG_DOCUMENT_14','FLAG_DOCUMENT_15','FLAG_DOCUMENT_16','FLAG_DOCUMENT_17',
 'FLAG_DOCUMENT_18','FLAG_DOCUMENT_19','FLAG_DOCUMENT_20','FLAG_DOCUMENT_21',
 'NFLAG_INSURED_ON_APPROVAL']]= mergeddf[['FLAG_OWN_CAR','FLAG_OWN_REALTY','FLAG_MOBIL',
                                        'FLAG_EMP_PHONE','FLAG_WORK_PHONE','FLAG_CONT_MOBILE',
                                        'FLAG_PHONE','FLAG_EMAIL','REGION_RATING_CLIENT',
                                        'REGION_RATING_CLIENT_W_CITY','REG_REGION_NOT_LIVE_REGION',
                                        'REG_REGION_NOT_WORK_REGION','LIVE_REGION_NOT_WORK_REGION',
                                        'REG_CITY_NOT_LIVE_CITY','REG_CITY_NOT_WORK_CITY','FLAG_DOCUMENT_2',
                                        'FLAG_DOCUMENT_3','FLAG_DOCUMENT_4','FLAG_DOCUMENT_5','FLAG_DOCUMENT_6',
                                        'FLAG_DOCUMENT_7','FLAG_DOCUMENT_8','FLAG_DOCUMENT_9',
                                        'FLAG_DOCUMENT_10','FLAG_DOCUMENT_11','FLAG_DOCUMENT_12','FLAG_DOCUMENT_13',
                                        'FLAG_DOCUMENT_14','FLAG_DOCUMENT_15','FLAG_DOCUMENT_16','FLAG_DOCUMENT_17',
                                        'FLAG_DOCUMENT_18','FLAG_DOCUMENT_19','FLAG_DOCUMENT_20','FLAG_DOCUMENT_21',
                                        'NFLAG_INSURED_ON_APPROVAL']].astype('category')
In [189]:
obj_dtypes = [i for i in mergeddf.select_dtypes(include=object).columns if i not in ["type"] ]
num_dtypes = [i for i in mergeddf.select_dtypes(include = np.number).columns if i not in [ 'TARGET']]
In [190]:
num_dtypes
Out[190]:
['CNT_CHILDREN',
 'AMT_INCOME_TOTAL',
 'AMT_CREDIT_x',
 'AMT_ANNUITY_x',
 'AMT_GOODS_PRICE_x',
 'REGION_POPULATION_RELATIVE',
 'DAYS_BIRTH',
 'DAYS_EMPLOYED',
 'DAYS_REGISTRATION',
 'DAYS_ID_PUBLISH',
 'CNT_FAM_MEMBERS',
 'HOUR_APPR_PROCESS_START_x',
 'LIVE_CITY_NOT_WORK_CITY',
 'OBS_30_CNT_SOCIAL_CIRCLE',
 'DEF_30_CNT_SOCIAL_CIRCLE',
 'OBS_60_CNT_SOCIAL_CIRCLE',
 'DEF_60_CNT_SOCIAL_CIRCLE',
 'DAYS_LAST_PHONE_CHANGE',
 'AMT_REQ_CREDIT_BUREAU_HOUR',
 'AMT_REQ_CREDIT_BUREAU_DAY',
 'AMT_REQ_CREDIT_BUREAU_WEEK',
 'AMT_REQ_CREDIT_BUREAU_MON',
 'AMT_REQ_CREDIT_BUREAU_QRT',
 'AMT_REQ_CREDIT_BUREAU_YEAR',
 'AMT_ANNUITY_y',
 'AMT_APPLICATION',
 'AMT_CREDIT_y',
 'AMT_GOODS_PRICE_y',
 'HOUR_APPR_PROCESS_START_y',
 'FLAG_LAST_APPL_PER_CONTRACT',
 'NFLAG_LAST_APPL_IN_DAY',
 'DAYS_DECISION',
 'SELLERPLACE_AREA',
 'CNT_PAYMENT',
 'DAYS_FIRST_DRAWING',
 'DAYS_FIRST_DUE',
 'DAYS_LAST_DUE_1ST_VERSION',
 'DAYS_LAST_DUE',
 'DAYS_TERMINATION']
In [191]:
obj_dtypes
Out[191]:
['NAME_CONTRACT_TYPE_x',
 'CODE_GENDER',
 'NAME_TYPE_SUITE_x',
 'NAME_INCOME_TYPE',
 'NAME_EDUCATION_TYPE',
 'NAME_FAMILY_STATUS',
 'NAME_HOUSING_TYPE',
 'WEEKDAY_APPR_PROCESS_START_x',
 'ORGANIZATION_TYPE',
 'NAME_CONTRACT_TYPE_y',
 'WEEKDAY_APPR_PROCESS_START_y',
 'NAME_CASH_LOAN_PURPOSE',
 'NAME_CONTRACT_STATUS',
 'NAME_PAYMENT_TYPE',
 'CODE_REJECT_REASON',
 'NAME_TYPE_SUITE_y',
 'NAME_CLIENT_TYPE',
 'NAME_GOODS_CATEGORY',
 'NAME_PORTFOLIO',
 'NAME_PRODUCT_TYPE',
 'CHANNEL_TYPE',
 'NAME_SELLER_INDUSTRY',
 'NAME_YIELD_GROUP',
 'PRODUCT_COMBINATION']
In [192]:
# Creating a dummy variable for some of the categorical variables and dropping the first one.
dummy1 = pd.get_dummies(mergeddf[['FLAG_OWN_CAR','FLAG_OWN_REALTY','FLAG_MOBIL',
                                  'FLAG_EMP_PHONE','FLAG_WORK_PHONE','FLAG_CONT_MOBILE',
                                  'FLAG_PHONE','FLAG_EMAIL','REGION_RATING_CLIENT',
                                  'REGION_RATING_CLIENT_W_CITY','REG_REGION_NOT_LIVE_REGION',
                                  'REG_REGION_NOT_WORK_REGION','LIVE_REGION_NOT_WORK_REGION',
                                  'REG_CITY_NOT_LIVE_CITY','REG_CITY_NOT_WORK_CITY','FLAG_DOCUMENT_2',
                                  'FLAG_DOCUMENT_3','FLAG_DOCUMENT_4','FLAG_DOCUMENT_5','FLAG_DOCUMENT_6',
                                  'FLAG_DOCUMENT_7','FLAG_DOCUMENT_8','FLAG_DOCUMENT_9',
                                  'FLAG_DOCUMENT_10','FLAG_DOCUMENT_11','FLAG_DOCUMENT_12','FLAG_DOCUMENT_13',
                                  'FLAG_DOCUMENT_14','FLAG_DOCUMENT_15','FLAG_DOCUMENT_16','FLAG_DOCUMENT_17',
                                  'FLAG_DOCUMENT_18','FLAG_DOCUMENT_19','FLAG_DOCUMENT_20','FLAG_DOCUMENT_21',
                                  'NFLAG_INSURED_ON_APPROVAL','NAME_CONTRACT_TYPE_x','CODE_GENDER',
                                  'NAME_TYPE_SUITE_x','NAME_INCOME_TYPE','NAME_EDUCATION_TYPE','NAME_FAMILY_STATUS',
                                  'NAME_HOUSING_TYPE','WEEKDAY_APPR_PROCESS_START_x','ORGANIZATION_TYPE','NAME_CONTRACT_TYPE_y',
                                  'WEEKDAY_APPR_PROCESS_START_y','NAME_CASH_LOAN_PURPOSE','NAME_CONTRACT_STATUS','NAME_PAYMENT_TYPE',
                                  'CODE_REJECT_REASON','NAME_TYPE_SUITE_y','NAME_CLIENT_TYPE','NAME_GOODS_CATEGORY',
                                  'NAME_PORTFOLIO','NAME_PRODUCT_TYPE','CHANNEL_TYPE','NAME_SELLER_INDUSTRY',
                                  'NAME_YIELD_GROUP','PRODUCT_COMBINATION']], drop_first=True)
dummy1.head()
Out[192]:
FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
0 0 1 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
2 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
3 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
4 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
In [193]:
# Adding the results to the master dataframe
mergeddf = pd.concat([mergeddf, dummy1], axis=1)
mergeddf.head()
Out[193]:
TARGET NAME_CONTRACT_TYPE_x CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x NAME_TYPE_SUITE_x NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START_x HOUR_APPR_PROCESS_START_x REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR NAME_CONTRACT_TYPE_y AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y WEEKDAY_APPR_PROCESS_START_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY NAME_CASH_LOAN_PURPOSE NAME_CONTRACT_STATUS DAYS_DECISION NAME_PAYMENT_TYPE CODE_REJECT_REASON NAME_TYPE_SUITE_y NAME_CLIENT_TYPE NAME_GOODS_CATEGORY NAME_PORTFOLIO NAME_PRODUCT_TYPE CHANNEL_TYPE SELLERPLACE_AREA NAME_SELLER_INDUSTRY CNT_PAYMENT NAME_YIELD_GROUP PRODUCT_COMBINATION DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION NFLAG_INSURED_ON_APPROVAL FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
0 1 Cash loans M 0 1 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 9461 -637 -3648.0 2120 1 1 0 1 1 0 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 2.0 2.0 2.0 2.0 1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 Consumer loans 9251.775 179055.0 179055.0 179055.0 SATURDAY 9 1 1 XAP Approved -606 XNA XAP Unaccompanied New Vehicles POS XNA Stone 500 Auto technology 24.0 low_normal POS other with interest 365243.0 -565.0 125.0 -25.0 -17.0 0.0 0 1 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
1 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 Cash loans 98356.995 900000.0 1035882.0 900000.0 FRIDAY 12 1 1 XNA Approved -746 XNA XAP Unaccompanied Repeater XNA Cash x-sell Credit and cash offices -1 XNA 12.0 low_normal Cash X-Sell: low 365243.0 -716.0 -386.0 -536.0 -527.0 1.0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
2 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 Consumer loans 64567.665 337500.0 348637.5 337500.0 SUNDAY 17 1 1 XAP Approved -828 Cash through the bank XAP Family Refreshed Furniture POS XNA Stone 1400 Furniture 6.0 middle POS industry with interest 365243.0 -797.0 -647.0 -647.0 -639.0 0.0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
3 0 Cash loans F 0 0 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 16765 -1188 -1186.0 291 1 1 0 1 1 0 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 1.0 0.0 1.0 0.0 828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 Consumer loans 6737.310 68809.5 68053.5 68809.5 SATURDAY 15 1 1 XAP Approved -2341 Cash through the bank XAP Family Refreshed Consumer Electronics POS XNA Country-wide 200 Consumer electronics 12.0 middle POS household with interest 365243.0 -2310.0 -1980.0 -1980.0 -1976.0 1.0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
4 0 Revolving loans M 1 1 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 19046 -225 -4260.0 2531 1 1 1 1 1 0 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government 0.0 0.0 0.0 0.0 815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 Consumer loans 5357.250 24282.0 20106.0 24282.0 FRIDAY 5 1 1 XAP Approved -815 Cash through the bank XAP Unaccompanied New Mobile POS XNA Regional / Local 30 Connectivity 4.0 middle POS mobile without interest 365243.0 -784.0 -694.0 -724.0 -714.0 0.0 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
In [194]:
mergeddf = mergeddf.drop(['FLAG_OWN_CAR','FLAG_OWN_REALTY','FLAG_MOBIL',
                          'FLAG_EMP_PHONE','FLAG_WORK_PHONE','FLAG_CONT_MOBILE',
                          'FLAG_PHONE','FLAG_EMAIL','REGION_RATING_CLIENT',
                          'REGION_RATING_CLIENT_W_CITY','REG_REGION_NOT_LIVE_REGION',
                          'REG_REGION_NOT_WORK_REGION','LIVE_REGION_NOT_WORK_REGION',
                          'REG_CITY_NOT_LIVE_CITY','REG_CITY_NOT_WORK_CITY','FLAG_DOCUMENT_2',
                          'FLAG_DOCUMENT_3','FLAG_DOCUMENT_4','FLAG_DOCUMENT_5','FLAG_DOCUMENT_6',
                          'FLAG_DOCUMENT_7','FLAG_DOCUMENT_8','FLAG_DOCUMENT_9',
                          'FLAG_DOCUMENT_10','FLAG_DOCUMENT_11','FLAG_DOCUMENT_12','FLAG_DOCUMENT_13',
                          'FLAG_DOCUMENT_14','FLAG_DOCUMENT_15','FLAG_DOCUMENT_16','FLAG_DOCUMENT_17',
                          'FLAG_DOCUMENT_18','FLAG_DOCUMENT_19','FLAG_DOCUMENT_20','FLAG_DOCUMENT_21',
                          'NFLAG_INSURED_ON_APPROVAL','NAME_CONTRACT_TYPE_x','CODE_GENDER',
                          'NAME_TYPE_SUITE_x','NAME_INCOME_TYPE','NAME_EDUCATION_TYPE','NAME_FAMILY_STATUS',
                          'NAME_HOUSING_TYPE','WEEKDAY_APPR_PROCESS_START_x','ORGANIZATION_TYPE','NAME_CONTRACT_TYPE_y',
                          'WEEKDAY_APPR_PROCESS_START_y','NAME_CASH_LOAN_PURPOSE','NAME_CONTRACT_STATUS','NAME_PAYMENT_TYPE',
                          'CODE_REJECT_REASON','NAME_TYPE_SUITE_y','NAME_CLIENT_TYPE','NAME_GOODS_CATEGORY',
                          'NAME_PORTFOLIO','NAME_PRODUCT_TYPE','CHANNEL_TYPE','NAME_SELLER_INDUSTRY',
                          'NAME_YIELD_GROUP','PRODUCT_COMBINATION'], axis = 1)
mergeddf.head()
Out[194]:
TARGET CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH CNT_FAM_MEMBERS HOUR_APPR_PROCESS_START_x LIVE_CITY_NOT_WORK_CITY OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY DAYS_DECISION SELLERPLACE_AREA CNT_PAYMENT DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
0 1 0 202500.0 406597.5 24700.5 351000.0 0.018801 9461 -637 -3648.0 2120 1.0 10 0 2.0 2.0 2.0 2.0 1134.0 0.0 0.0 0.0 0.0 0.0 1.0 9251.775 179055.0 179055.0 179055.0 9 1 1 -606 500 24.0 365243.0 -565.0 125.0 -25.0 -17.0 0 1 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
1 0 0 270000.0 1293502.5 35698.5 1129500.0 0.003541 16765 -1188 -1186.0 291 2.0 11 0 1.0 0.0 1.0 0.0 828.0 0.0 0.0 0.0 0.0 0.0 0.0 98356.995 900000.0 1035882.0 900000.0 12 1 1 -746 -1 12.0 365243.0 -716.0 -386.0 -536.0 -527.0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
2 0 0 270000.0 1293502.5 35698.5 1129500.0 0.003541 16765 -1188 -1186.0 291 2.0 11 0 1.0 0.0 1.0 0.0 828.0 0.0 0.0 0.0 0.0 0.0 0.0 64567.665 337500.0 348637.5 337500.0 17 1 1 -828 1400 6.0 365243.0 -797.0 -647.0 -647.0 -639.0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
3 0 0 270000.0 1293502.5 35698.5 1129500.0 0.003541 16765 -1188 -1186.0 291 2.0 11 0 1.0 0.0 1.0 0.0 828.0 0.0 0.0 0.0 0.0 0.0 0.0 6737.310 68809.5 68053.5 68809.5 15 1 1 -2341 200 12.0 365243.0 -2310.0 -1980.0 -1980.0 -1976.0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
4 0 0 67500.0 135000.0 6750.0 135000.0 0.010032 19046 -225 -4260.0 2531 1.0 9 0 0.0 0.0 0.0 0.0 815.0 0.0 0.0 0.0 0.0 0.0 0.0 5357.250 24282.0 20106.0 24282.0 5 1 1 -815 30 4.0 365243.0 -784.0 -694.0 -724.0 -714.0 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
In [195]:
mergeddf.shape
Out[195]:
(1406625, 292)
In [196]:
mergeddfs=mergeddf.sample(n = 7000) 
In [197]:
from sklearn.model_selection import train_test_split

# Putting feature variable to X
X = mergeddfs.drop(['TARGET'], axis=1)
In [198]:
X.head()
Out[198]:
CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH CNT_FAM_MEMBERS HOUR_APPR_PROCESS_START_x LIVE_CITY_NOT_WORK_CITY OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY DAYS_DECISION SELLERPLACE_AREA CNT_PAYMENT DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
620985 2 87750.0 314100.0 16573.5 225000.0 0.018634 10856 -1455 -2167.0 3394 4.0 13 1 0.0 0.0 0.0 0.0 1202.0 0.0 0.0 0.0 0.0 0.0 4.0 10180.215000 90000.0 95940.0 90000.000000 11 1 1 -146 53 12.0 365243.0 -116.0 214.0 365243.0 365243.0 0 1 1 1 1 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
1030348 0 90000.0 135000.0 6750.0 135000.0 0.010966 17953 -631 -11317.0 1302 2.0 11 1 0.0 0.0 0.0 0.0 178.0 0.0 0.0 0.0 0.0 0.0 2.0 15837.184952 0.0 0.0 226451.191283 12 1 1 -142 30 12.0 365243.0 -825.0 -358.0 -534.0 -494.0 1 1 1 0 1 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
206555 1 225000.0 906615.0 30091.5 688500.0 0.032561 15346 -5040 -5071.0 1121 3.0 14 0 1.0 0.0 1.0 0.0 120.0 0.0 0.0 0.0 0.0 3.0 1.0 15837.184952 0.0 0.0 226451.191283 13 1 1 -120 -1 12.0 365243.0 -825.0 -358.0 -534.0 -494.0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1257561 0 180000.0 495351.0 28566.0 459000.0 0.018634 19463 -11799 -6945.0 3001 2.0 10 0 2.0 0.0 2.0 0.0 3485.0 0.0 0.0 0.0 0.0 0.0 1.0 15837.184952 0.0 0.0 226451.191283 14 1 1 -7 -1 12.0 365243.0 -825.0 -358.0 -534.0 -494.0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1283370 0 135000.0 835605.0 24561.0 697500.0 0.019689 16011 -951 -5757.0 4664 2.0 11 0 0.0 0.0 0.0 0.0 1932.0 0.0 0.0 0.0 0.0 0.0 0.0 5110.020000 38916.0 26928.0 38916.000000 9 1 1 -259 1000 6.0 365243.0 -228.0 -78.0 -78.0 -76.0 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
In [199]:
X.shape
Out[199]:
(7000, 291)
In [200]:
y = mergeddfs['TARGET']
In [201]:
y.head()
Out[201]:
620985     0
1030348    1
206555     0
1257561    0
1283370    0
Name: TARGET, dtype: int64
In [202]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=70)
In [203]:
X_train.head()
Out[203]:
CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH CNT_FAM_MEMBERS HOUR_APPR_PROCESS_START_x LIVE_CITY_NOT_WORK_CITY OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY DAYS_DECISION SELLERPLACE_AREA CNT_PAYMENT DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
886912 0 225000.0 540000.0 30280.5 540000.0 0.035792 16136 -4914 -8152.0 4305 2.0 19 0 0.0 0.0 0.0 0.0 611.0 0.0 0.0 1.0 1.0 0.0 7.0 15837.184952 0.0 0.0 226451.191283 14 1 1 -254 -1 12.0 365243.0 -825.0 -358.0 -534.0 -494.0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1345274 0 225000.0 387000.0 14719.5 387000.0 0.018209 21455 -3406 -12828.0 4196 2.0 15 0 4.0 0.0 4.0 0.0 1067.0 0.0 0.0 0.0 0.0 0.0 3.0 15837.184952 0.0 0.0 226451.191283 9 1 1 -27 -1 12.0 365243.0 -825.0 -358.0 -534.0 -494.0 0 1 1 0 1 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
710187 0 202500.0 157500.0 7875.0 157500.0 0.019101 20914 -1229 -1980.0 1985 2.0 17 0 0.0 0.0 0.0 0.0 985.0 0.0 0.0 0.0 0.0 0.0 0.0 15837.184952 0.0 0.0 226451.191283 11 1 1 -19 -1 12.0 365243.0 -825.0 -358.0 -534.0 -494.0 1 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
969891 0 112500.0 450000.0 24543.0 450000.0 0.028663 20557 365243 -23.0 3445 1.0 14 0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9131.760000 70267.5 68454.0 70267.500000 14 1 1 -2678 80 10.0 365243.0 -2647.0 -2377.0 -2377.0 -2370.0 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
1169924 0 225000.0 1408806.0 92457.0 1350000.0 0.031329 18667 -564 -5628.0 2207 2.0 10 0 0.0 0.0 0.0 0.0 2782.0 0.0 0.0 0.0 0.0 0.0 0.0 6621.795000 47533.5 23242.5 47533.500000 12 1 1 -2782 30 4.0 365243.0 -2751.0 -2661.0 -2661.0 -2609.0 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
In [204]:
X_train.shape
Out[204]:
(4900, 291)
In [205]:
X_test.head()
Out[205]:
CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH CNT_FAM_MEMBERS HOUR_APPR_PROCESS_START_x LIVE_CITY_NOT_WORK_CITY OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY DAYS_DECISION SELLERPLACE_AREA CNT_PAYMENT DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
563940 0 202500.0 1024740.0 52452.0 900000.0 0.007114 19631 365243 -174.0 3177 2.0 13 0 2.0 0.0 2.0 0.0 159.0 0.0 0.0 0.0 0.0 0.0 8.0 15837.184952 0.0 0.0 226451.191283 12 1 1 -169 -1 12.0 365243.0 -825.0 -358.0 -534.0 -494.0 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
186208 0 112500.0 468733.5 21982.5 387000.0 0.022800 19606 -336 -3935.0 3085 2.0 9 1 1.0 1.0 1.0 1.0 1815.0 0.0 0.0 0.0 0.0 0.0 1.0 15837.184952 133065.0 133065.0 133065.000000 9 1 1 -1815 1882 12.0 365243.0 -825.0 -358.0 -534.0 -494.0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
739774 0 270000.0 566055.0 18387.0 472500.0 0.018801 21797 -6903 -6437.0 4015 2.0 14 0 0.0 0.0 0.0 0.0 1495.0 0.0 0.0 0.0 0.0 2.0 1.0 13481.955000 174411.0 174411.0 174411.000000 17 1 1 -734 15 14.0 365243.0 -700.0 -310.0 -490.0 -484.0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
767186 0 90000.0 175500.0 15192.0 175500.0 0.014520 21014 365243 -3963.0 3963 2.0 9 0 1.0 0.0 1.0 0.0 601.0 0.0 0.0 0.0 0.0 1.0 4.0 17607.240000 121941.0 95215.5 121941.000000 13 1 1 -601 2500 6.0 365243.0 -570.0 -420.0 -450.0 -441.0 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
502320 1 157500.0 492862.5 24102.0 346500.0 0.018801 13246 -1565 -7112.0 4909 3.0 6 1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 3.0 8335.980000 229500.0 265477.5 229500.000000 7 1 1 -252 -1 48.0 365243.0 -222.0 1188.0 365243.0 365243.0 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
In [206]:
X_test.shape
Out[206]:
(2100, 291)
In [207]:
y_train.head()
Out[207]:
886912     0
1345274    0
710187     0
969891     0
1169924    0
Name: TARGET, dtype: int64
In [208]:
y_train.shape
Out[208]:
(4900,)
In [209]:
y_test.head()
Out[209]:
563940    0
186208    0
739774    0
767186    0
502320    0
Name: TARGET, dtype: int64
In [210]:
y_test.shape
Out[210]:
(2100,)
In [211]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train[['CNT_CHILDREN','AMT_INCOME_TOTAL','AMT_CREDIT_x','AMT_ANNUITY_x',
         'AMT_GOODS_PRICE_x','REGION_POPULATION_RELATIVE','DAYS_BIRTH',
         'DAYS_EMPLOYED','DAYS_REGISTRATION','DAYS_ID_PUBLISH','CNT_FAM_MEMBERS',
         'HOUR_APPR_PROCESS_START_x','LIVE_CITY_NOT_WORK_CITY','OBS_30_CNT_SOCIAL_CIRCLE',
         'DEF_30_CNT_SOCIAL_CIRCLE','OBS_60_CNT_SOCIAL_CIRCLE','DEF_60_CNT_SOCIAL_CIRCLE',
         'DAYS_LAST_PHONE_CHANGE','AMT_REQ_CREDIT_BUREAU_HOUR','AMT_REQ_CREDIT_BUREAU_DAY',
         'AMT_REQ_CREDIT_BUREAU_WEEK','AMT_REQ_CREDIT_BUREAU_MON','AMT_REQ_CREDIT_BUREAU_QRT',
         'AMT_REQ_CREDIT_BUREAU_YEAR','AMT_ANNUITY_y','AMT_APPLICATION','AMT_CREDIT_y',
         'AMT_GOODS_PRICE_y','HOUR_APPR_PROCESS_START_y','FLAG_LAST_APPL_PER_CONTRACT',
         'NFLAG_LAST_APPL_IN_DAY','DAYS_DECISION','SELLERPLACE_AREA','CNT_PAYMENT',
         'DAYS_FIRST_DRAWING','DAYS_FIRST_DUE','DAYS_LAST_DUE_1ST_VERSION','DAYS_LAST_DUE',
         'DAYS_TERMINATION']] = scaler.fit_transform(X_train[['CNT_CHILDREN','AMT_INCOME_TOTAL','AMT_CREDIT_x',
                                                               'AMT_ANNUITY_x','AMT_GOODS_PRICE_x','REGION_POPULATION_RELATIVE',
                                                               'DAYS_BIRTH','DAYS_EMPLOYED','DAYS_REGISTRATION','DAYS_ID_PUBLISH',
                                                               'CNT_FAM_MEMBERS','HOUR_APPR_PROCESS_START_x','LIVE_CITY_NOT_WORK_CITY',
                                                               'OBS_30_CNT_SOCIAL_CIRCLE','DEF_30_CNT_SOCIAL_CIRCLE','OBS_60_CNT_SOCIAL_CIRCLE',
                                                               'DEF_60_CNT_SOCIAL_CIRCLE','DAYS_LAST_PHONE_CHANGE','AMT_REQ_CREDIT_BUREAU_HOUR',
                                                               'AMT_REQ_CREDIT_BUREAU_DAY','AMT_REQ_CREDIT_BUREAU_WEEK','AMT_REQ_CREDIT_BUREAU_MON',
                                                               'AMT_REQ_CREDIT_BUREAU_QRT','AMT_REQ_CREDIT_BUREAU_YEAR','AMT_ANNUITY_y','AMT_APPLICATION',
                                                               'AMT_CREDIT_y','AMT_GOODS_PRICE_y','HOUR_APPR_PROCESS_START_y','FLAG_LAST_APPL_PER_CONTRACT',
                                                               'NFLAG_LAST_APPL_IN_DAY','DAYS_DECISION','SELLERPLACE_AREA','CNT_PAYMENT','DAYS_FIRST_DRAWING',
                                                               'DAYS_FIRST_DUE','DAYS_LAST_DUE_1ST_VERSION','DAYS_LAST_DUE','DAYS_TERMINATION']])

X_train.head()
Out[211]:
CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH CNT_FAM_MEMBERS HOUR_APPR_PROCESS_START_x LIVE_CITY_NOT_WORK_CITY OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY DAYS_DECISION SELLERPLACE_AREA CNT_PAYMENT DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
886912 -0.565295 0.602840 -0.109029 0.237550 0.049634 1.195677 -0.040396 -0.515113 -0.903322 0.827966 -0.164389 2.189600 -0.46203 -0.622475 -0.333640 -0.621366 -0.283858 -0.580375 -0.072306 -0.064286 5.608317 0.774544 -0.45678 1.873421 0.006095 -0.598698 -0.616825 0.008398 0.465356 0.068673 0.059004 0.803143 -0.206051 -0.232557 0.206821 -0.161489 -0.250496 -0.385506 -0.402045 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1345274 -0.565295 0.602840 -0.504282 -0.882596 -0.382168 -0.168180 1.182491 -0.504841 -2.227636 0.755375 -0.164389 0.948029 -0.46203 1.002743 -0.333640 1.016886 -0.283858 -0.010246 -0.072306 -0.064286 -0.168591 -0.284108 -0.45678 0.160139 0.006095 -0.598698 -0.616825 0.008398 -1.047544 0.068673 0.059004 1.089453 -0.206051 -0.232557 0.206821 -0.161489 -0.250496 -0.385506 -0.402045 0 1 1 0 1 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
710187 -0.565295 0.347182 -1.097161 -1.375291 -1.029872 -0.098990 1.058110 -0.490013 0.844681 -0.717089 -0.164389 1.568814 -0.46203 -0.622475 -0.333640 -0.621366 -0.283858 -0.112770 -0.072306 -0.064286 -0.168591 -0.284108 -0.45678 -1.124822 0.006095 -0.598698 -0.616825 0.008398 -0.442384 0.068673 0.059004 1.099543 -0.206051 -0.232557 0.206821 -0.161489 -0.250496 -0.385506 -0.402045 1 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
969891 -0.565295 -0.675449 -0.341531 -0.175459 -0.204367 0.642704 0.976033 2.006208 1.398933 0.255230 -1.292547 0.637636 -0.46203 -0.216171 1.789523 -0.211803 -0.283858 -1.344297 -0.072306 -0.064286 -0.168591 -0.284108 -0.45678 -1.124822 -0.516710 -0.355134 -0.398309 -0.563952 0.465356 0.068673 0.059004 -2.254190 -0.147522 -0.388368 0.206821 -0.192560 -0.273737 -0.400448 -0.416799 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
1169924 -0.565295 0.602840 2.135401 4.713275 2.335648 0.849497 0.541504 -0.485483 -0.188488 -0.569243 -0.164389 -0.603936 -0.46203 -0.622475 -0.333640 -0.621366 -0.283858 2.133986 -0.072306 -0.064286 -0.168591 -0.284108 -0.45678 -1.124822 -0.712406 -0.433935 -0.542631 -0.647263 -0.139804 0.068673 0.059004 -2.385363 -0.183651 -0.855803 0.206821 -0.194333 -0.277006 -0.402750 -0.418678 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
In [212]:
X_test[['CNT_CHILDREN','AMT_INCOME_TOTAL','AMT_CREDIT_x','AMT_ANNUITY_x',
         'AMT_GOODS_PRICE_x','REGION_POPULATION_RELATIVE','DAYS_BIRTH',
         'DAYS_EMPLOYED','DAYS_REGISTRATION','DAYS_ID_PUBLISH','CNT_FAM_MEMBERS',
         'HOUR_APPR_PROCESS_START_x','LIVE_CITY_NOT_WORK_CITY','OBS_30_CNT_SOCIAL_CIRCLE',
         'DEF_30_CNT_SOCIAL_CIRCLE','OBS_60_CNT_SOCIAL_CIRCLE','DEF_60_CNT_SOCIAL_CIRCLE',
         'DAYS_LAST_PHONE_CHANGE','AMT_REQ_CREDIT_BUREAU_HOUR','AMT_REQ_CREDIT_BUREAU_DAY',
         'AMT_REQ_CREDIT_BUREAU_WEEK','AMT_REQ_CREDIT_BUREAU_MON','AMT_REQ_CREDIT_BUREAU_QRT',
         'AMT_REQ_CREDIT_BUREAU_YEAR','AMT_ANNUITY_y','AMT_APPLICATION','AMT_CREDIT_y',
         'AMT_GOODS_PRICE_y','HOUR_APPR_PROCESS_START_y','FLAG_LAST_APPL_PER_CONTRACT',
         'NFLAG_LAST_APPL_IN_DAY','DAYS_DECISION','SELLERPLACE_AREA','CNT_PAYMENT',
         'DAYS_FIRST_DRAWING','DAYS_FIRST_DUE','DAYS_LAST_DUE_1ST_VERSION','DAYS_LAST_DUE',
         'DAYS_TERMINATION']] = scaler.transform(X_test[['CNT_CHILDREN','AMT_INCOME_TOTAL','AMT_CREDIT_x',
                                                               'AMT_ANNUITY_x','AMT_GOODS_PRICE_x','REGION_POPULATION_RELATIVE',
                                                               'DAYS_BIRTH','DAYS_EMPLOYED','DAYS_REGISTRATION','DAYS_ID_PUBLISH',
                                                               'CNT_FAM_MEMBERS','HOUR_APPR_PROCESS_START_x','LIVE_CITY_NOT_WORK_CITY',
                                                               'OBS_30_CNT_SOCIAL_CIRCLE','DEF_30_CNT_SOCIAL_CIRCLE','OBS_60_CNT_SOCIAL_CIRCLE',
                                                               'DEF_60_CNT_SOCIAL_CIRCLE','DAYS_LAST_PHONE_CHANGE','AMT_REQ_CREDIT_BUREAU_HOUR',
                                                               'AMT_REQ_CREDIT_BUREAU_DAY','AMT_REQ_CREDIT_BUREAU_WEEK','AMT_REQ_CREDIT_BUREAU_MON',
                                                               'AMT_REQ_CREDIT_BUREAU_QRT','AMT_REQ_CREDIT_BUREAU_YEAR','AMT_ANNUITY_y','AMT_APPLICATION',
                                                               'AMT_CREDIT_y','AMT_GOODS_PRICE_y','HOUR_APPR_PROCESS_START_y','FLAG_LAST_APPL_PER_CONTRACT',
                                                               'NFLAG_LAST_APPL_IN_DAY','DAYS_DECISION','SELLERPLACE_AREA','CNT_PAYMENT','DAYS_FIRST_DRAWING',
                                                               'DAYS_FIRST_DUE','DAYS_LAST_DUE_1ST_VERSION','DAYS_LAST_DUE','DAYS_TERMINATION']])

X_test.head()
Out[212]:
CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH CNT_FAM_MEMBERS HOUR_APPR_PROCESS_START_x LIVE_CITY_NOT_WORK_CITY OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY DAYS_DECISION SELLERPLACE_AREA CNT_PAYMENT DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
563940 -0.565295 0.347182 1.143224 1.833548 1.065640 -1.028783 0.763137 2.006208 1.356168 0.076750 -0.164389 0.327243 -0.46203 0.190134 -0.333640 0.197760 -0.283858 -1.145502 -0.072306 -0.064286 -0.168591 -0.284108 -0.456780 2.301741 0.006095 -0.598698 -0.616825 0.008398 -0.139804 0.068673 0.059004 0.910352 -0.206051 -0.232557 0.206821 -0.161489 -0.250496 -0.385506 -0.402045 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
186208 -0.565295 -0.675449 -0.293136 -0.359775 -0.382168 0.187930 0.757389 -0.483930 0.290996 0.015480 -0.164389 -0.914329 2.16436 -0.216171 1.789523 -0.211803 2.282384 0.924964 -0.072306 -0.064286 -0.168591 -0.284108 -0.456780 -0.696501 0.006095 -0.137463 -0.192062 -0.333824 -1.047544 0.068673 0.059004 -1.165709 1.154562 -0.232557 0.206821 -0.161489 -0.250496 -0.385506 -0.402045 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
739774 -0.565295 1.114155 -0.041720 -0.618594 -0.140867 -0.122260 1.261120 -0.528661 -0.417609 0.634834 -0.164389 0.637636 -0.46203 -0.622475 -0.333640 -0.621366 -0.283858 0.524874 -0.072306 -0.064286 -0.168591 -0.284108 2.394456 -0.696501 -0.177537 0.005852 -0.060079 -0.182308 1.373096 0.068673 0.059004 0.197731 -0.194489 -0.076745 0.206821 -0.159357 -0.249943 -0.385149 -0.401966 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
767186 -0.565295 -0.931107 -1.050661 -0.848583 -0.979072 -0.454323 1.081101 2.006208 0.283066 0.600204 -0.164389 -0.914329 -0.46203 -0.216171 -0.333640 -0.211803 -0.283858 -0.592878 -0.072306 -0.064286 -0.168591 -0.284108 0.968838 0.588460 0.144102 -0.176021 -0.312883 -0.374589 0.162776 0.068673 0.059004 0.365481 1.601115 -0.699992 0.206821 -0.157140 -0.251210 -0.384825 -0.401628 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
502320 0.846501 -0.164134 -0.230802 -0.207204 -0.496469 -0.122260 -0.704834 -0.492301 -0.608779 1.230213 0.963769 -1.845507 2.16436 -0.622475 -0.333640 -0.621366 -0.283858 -1.344297 -0.072306 -0.064286 -0.168591 0.774544 -0.456780 0.160139 -0.578755 0.196804 0.230619 0.019571 -1.652704 0.068673 0.059004 0.805666 -0.206051 2.572052 0.206821 -0.151206 -0.232700 2.579944 2.474262 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
In [213]:
X_test[['CNT_CHILDREN','AMT_INCOME_TOTAL','AMT_CREDIT_x','AMT_ANNUITY_x',
         'AMT_GOODS_PRICE_x','REGION_POPULATION_RELATIVE','DAYS_BIRTH',
         'DAYS_EMPLOYED','DAYS_REGISTRATION','DAYS_ID_PUBLISH','CNT_FAM_MEMBERS',
         'HOUR_APPR_PROCESS_START_x','LIVE_CITY_NOT_WORK_CITY','OBS_30_CNT_SOCIAL_CIRCLE',
         'DEF_30_CNT_SOCIAL_CIRCLE','OBS_60_CNT_SOCIAL_CIRCLE','DEF_60_CNT_SOCIAL_CIRCLE',
         'DAYS_LAST_PHONE_CHANGE','AMT_REQ_CREDIT_BUREAU_HOUR','AMT_REQ_CREDIT_BUREAU_DAY',
         'AMT_REQ_CREDIT_BUREAU_WEEK','AMT_REQ_CREDIT_BUREAU_MON','AMT_REQ_CREDIT_BUREAU_QRT',
         'AMT_REQ_CREDIT_BUREAU_YEAR','AMT_ANNUITY_y','AMT_APPLICATION','AMT_CREDIT_y',
         'AMT_GOODS_PRICE_y','HOUR_APPR_PROCESS_START_y','FLAG_LAST_APPL_PER_CONTRACT',
         'NFLAG_LAST_APPL_IN_DAY','DAYS_DECISION','SELLERPLACE_AREA','CNT_PAYMENT',
         'DAYS_FIRST_DRAWING','DAYS_FIRST_DUE','DAYS_LAST_DUE_1ST_VERSION','DAYS_LAST_DUE',
         'DAYS_TERMINATION']] = scaler.transform(X_test[['CNT_CHILDREN','AMT_INCOME_TOTAL','AMT_CREDIT_x',
                                                               'AMT_ANNUITY_x','AMT_GOODS_PRICE_x','REGION_POPULATION_RELATIVE',
                                                               'DAYS_BIRTH','DAYS_EMPLOYED','DAYS_REGISTRATION','DAYS_ID_PUBLISH',
                                                               'CNT_FAM_MEMBERS','HOUR_APPR_PROCESS_START_x','LIVE_CITY_NOT_WORK_CITY',
                                                               'OBS_30_CNT_SOCIAL_CIRCLE','DEF_30_CNT_SOCIAL_CIRCLE','OBS_60_CNT_SOCIAL_CIRCLE',
                                                               'DEF_60_CNT_SOCIAL_CIRCLE','DAYS_LAST_PHONE_CHANGE','AMT_REQ_CREDIT_BUREAU_HOUR',
                                                               'AMT_REQ_CREDIT_BUREAU_DAY','AMT_REQ_CREDIT_BUREAU_WEEK','AMT_REQ_CREDIT_BUREAU_MON',
                                                               'AMT_REQ_CREDIT_BUREAU_QRT','AMT_REQ_CREDIT_BUREAU_YEAR','AMT_ANNUITY_y','AMT_APPLICATION',
                                                               'AMT_CREDIT_y','AMT_GOODS_PRICE_y','HOUR_APPR_PROCESS_START_y','FLAG_LAST_APPL_PER_CONTRACT',
                                                               'NFLAG_LAST_APPL_IN_DAY','DAYS_DECISION','SELLERPLACE_AREA','CNT_PAYMENT','DAYS_FIRST_DRAWING',
                                                               'DAYS_FIRST_DUE','DAYS_LAST_DUE_1ST_VERSION','DAYS_LAST_DUE','DAYS_TERMINATION']])

X_test.head()
Out[213]:
CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT_x AMT_ANNUITY_x AMT_GOODS_PRICE_x REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH CNT_FAM_MEMBERS HOUR_APPR_PROCESS_START_x LIVE_CITY_NOT_WORK_CITY OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR AMT_ANNUITY_y AMT_APPLICATION AMT_CREDIT_y AMT_GOODS_PRICE_y HOUR_APPR_PROCESS_START_y FLAG_LAST_APPL_PER_CONTRACT NFLAG_LAST_APPL_IN_DAY DAYS_DECISION SELLERPLACE_AREA CNT_PAYMENT DAYS_FIRST_DRAWING DAYS_FIRST_DUE DAYS_LAST_DUE_1ST_VERSION DAYS_LAST_DUE DAYS_TERMINATION FLAG_OWN_CAR_1 FLAG_OWN_REALTY_1 FLAG_EMP_PHONE_1 FLAG_WORK_PHONE_1 FLAG_CONT_MOBILE_1 FLAG_PHONE_1 FLAG_EMAIL_1 REGION_RATING_CLIENT_2 REGION_RATING_CLIENT_3 REGION_RATING_CLIENT_W_CITY_2 REGION_RATING_CLIENT_W_CITY_3 REG_REGION_NOT_LIVE_REGION_1 REG_REGION_NOT_WORK_REGION_1 LIVE_REGION_NOT_WORK_REGION_1 REG_CITY_NOT_LIVE_CITY_1 REG_CITY_NOT_WORK_CITY_1 FLAG_DOCUMENT_2_1 FLAG_DOCUMENT_3_1 FLAG_DOCUMENT_4_1 FLAG_DOCUMENT_5_1 FLAG_DOCUMENT_6_1 FLAG_DOCUMENT_7_1 FLAG_DOCUMENT_8_1 FLAG_DOCUMENT_9_1 FLAG_DOCUMENT_10_1 FLAG_DOCUMENT_11_1 FLAG_DOCUMENT_12_1 FLAG_DOCUMENT_13_1 FLAG_DOCUMENT_14_1 FLAG_DOCUMENT_15_1 FLAG_DOCUMENT_16_1 FLAG_DOCUMENT_17_1 FLAG_DOCUMENT_18_1 FLAG_DOCUMENT_19_1 FLAG_DOCUMENT_20_1 FLAG_DOCUMENT_21_1 NFLAG_INSURED_ON_APPROVAL_1.0 NAME_CONTRACT_TYPE_x_Revolving loans CODE_GENDER_M CODE_GENDER_XNA NAME_TYPE_SUITE_x_Family NAME_TYPE_SUITE_x_Group of people NAME_TYPE_SUITE_x_Other_A NAME_TYPE_SUITE_x_Other_B NAME_TYPE_SUITE_x_Spouse, partner NAME_TYPE_SUITE_x_Unaccompanied NAME_INCOME_TYPE_Maternity leave NAME_INCOME_TYPE_Pensioner NAME_INCOME_TYPE_State servant NAME_INCOME_TYPE_Student NAME_INCOME_TYPE_Unemployed NAME_INCOME_TYPE_Working NAME_EDUCATION_TYPE_Higher education NAME_EDUCATION_TYPE_Incomplete higher NAME_EDUCATION_TYPE_Lower secondary NAME_EDUCATION_TYPE_Secondary / secondary special NAME_FAMILY_STATUS_Married NAME_FAMILY_STATUS_Separated NAME_FAMILY_STATUS_Single / not married NAME_FAMILY_STATUS_Widow NAME_HOUSING_TYPE_House / apartment NAME_HOUSING_TYPE_Municipal apartment NAME_HOUSING_TYPE_Office apartment NAME_HOUSING_TYPE_Rented apartment NAME_HOUSING_TYPE_With parents WEEKDAY_APPR_PROCESS_START_x_MONDAY WEEKDAY_APPR_PROCESS_START_x_SATURDAY WEEKDAY_APPR_PROCESS_START_x_SUNDAY WEEKDAY_APPR_PROCESS_START_x_THURSDAY WEEKDAY_APPR_PROCESS_START_x_TUESDAY WEEKDAY_APPR_PROCESS_START_x_WEDNESDAY ORGANIZATION_TYPE_Agriculture ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Business Entity Type 1 ORGANIZATION_TYPE_Business Entity Type 2 ORGANIZATION_TYPE_Business Entity Type 3 ORGANIZATION_TYPE_Cleaning ORGANIZATION_TYPE_Construction ORGANIZATION_TYPE_Culture ORGANIZATION_TYPE_Electricity ORGANIZATION_TYPE_Emergency ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Hotel ORGANIZATION_TYPE_Housing ORGANIZATION_TYPE_Industry: type 1 ORGANIZATION_TYPE_Industry: type 10 ORGANIZATION_TYPE_Industry: type 11 ORGANIZATION_TYPE_Industry: type 12 ORGANIZATION_TYPE_Industry: type 13 ORGANIZATION_TYPE_Industry: type 2 ORGANIZATION_TYPE_Industry: type 3 ORGANIZATION_TYPE_Industry: type 4 ORGANIZATION_TYPE_Industry: type 5 ORGANIZATION_TYPE_Industry: type 6 ORGANIZATION_TYPE_Industry: type 7 ORGANIZATION_TYPE_Industry: type 8 ORGANIZATION_TYPE_Industry: type 9 ORGANIZATION_TYPE_Insurance ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Legal Services ORGANIZATION_TYPE_Medicine ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Mobile ORGANIZATION_TYPE_Other ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Postal ORGANIZATION_TYPE_Realtor ORGANIZATION_TYPE_Religion ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_School ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Security Ministries ORGANIZATION_TYPE_Self-employed ORGANIZATION_TYPE_Services ORGANIZATION_TYPE_Telecom ORGANIZATION_TYPE_Trade: type 1 ORGANIZATION_TYPE_Trade: type 2 ORGANIZATION_TYPE_Trade: type 3 ORGANIZATION_TYPE_Trade: type 4 ORGANIZATION_TYPE_Trade: type 5 ORGANIZATION_TYPE_Trade: type 6 ORGANIZATION_TYPE_Trade: type 7 ORGANIZATION_TYPE_Transport: type 1 ORGANIZATION_TYPE_Transport: type 2 ORGANIZATION_TYPE_Transport: type 3 ORGANIZATION_TYPE_Transport: type 4 ORGANIZATION_TYPE_University ORGANIZATION_TYPE_XNA NAME_CONTRACT_TYPE_y_Consumer loans NAME_CONTRACT_TYPE_y_Revolving loans WEEKDAY_APPR_PROCESS_START_y_MONDAY WEEKDAY_APPR_PROCESS_START_y_SATURDAY WEEKDAY_APPR_PROCESS_START_y_SUNDAY WEEKDAY_APPR_PROCESS_START_y_THURSDAY WEEKDAY_APPR_PROCESS_START_y_TUESDAY WEEKDAY_APPR_PROCESS_START_y_WEDNESDAY NAME_CASH_LOAN_PURPOSE_Business development NAME_CASH_LOAN_PURPOSE_Buying a garage NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land NAME_CASH_LOAN_PURPOSE_Buying a home NAME_CASH_LOAN_PURPOSE_Buying a new car NAME_CASH_LOAN_PURPOSE_Buying a used car NAME_CASH_LOAN_PURPOSE_Car repairs NAME_CASH_LOAN_PURPOSE_Education NAME_CASH_LOAN_PURPOSE_Everyday expenses NAME_CASH_LOAN_PURPOSE_Furniture NAME_CASH_LOAN_PURPOSE_Gasification / water supply NAME_CASH_LOAN_PURPOSE_Hobby NAME_CASH_LOAN_PURPOSE_Journey NAME_CASH_LOAN_PURPOSE_Medicine NAME_CASH_LOAN_PURPOSE_Money for a third person NAME_CASH_LOAN_PURPOSE_Other NAME_CASH_LOAN_PURPOSE_Payments on other loans NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment NAME_CASH_LOAN_PURPOSE_Refusal to name the goal NAME_CASH_LOAN_PURPOSE_Repairs NAME_CASH_LOAN_PURPOSE_Urgent needs NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday NAME_CASH_LOAN_PURPOSE_XAP NAME_CASH_LOAN_PURPOSE_XNA NAME_CONTRACT_STATUS_Canceled NAME_CONTRACT_STATUS_Refused NAME_CONTRACT_STATUS_Unused offer NAME_PAYMENT_TYPE_Cashless from the account of the employer NAME_PAYMENT_TYPE_Non-cash from your account NAME_PAYMENT_TYPE_XNA CODE_REJECT_REASON_HC CODE_REJECT_REASON_LIMIT CODE_REJECT_REASON_SCO CODE_REJECT_REASON_SCOFR CODE_REJECT_REASON_SYSTEM CODE_REJECT_REASON_VERIF CODE_REJECT_REASON_XAP CODE_REJECT_REASON_XNA NAME_TYPE_SUITE_y_Family NAME_TYPE_SUITE_y_Group of people NAME_TYPE_SUITE_y_Other_A NAME_TYPE_SUITE_y_Other_B NAME_TYPE_SUITE_y_Spouse, partner NAME_TYPE_SUITE_y_Unaccompanied NAME_CLIENT_TYPE_Refreshed NAME_CLIENT_TYPE_Repeater NAME_CLIENT_TYPE_XNA NAME_GOODS_CATEGORY_Animals NAME_GOODS_CATEGORY_Audio/Video NAME_GOODS_CATEGORY_Auto Accessories NAME_GOODS_CATEGORY_Clothing and Accessories NAME_GOODS_CATEGORY_Computers NAME_GOODS_CATEGORY_Construction Materials NAME_GOODS_CATEGORY_Consumer Electronics NAME_GOODS_CATEGORY_Direct Sales NAME_GOODS_CATEGORY_Education NAME_GOODS_CATEGORY_Fitness NAME_GOODS_CATEGORY_Furniture NAME_GOODS_CATEGORY_Gardening NAME_GOODS_CATEGORY_Homewares NAME_GOODS_CATEGORY_Insurance NAME_GOODS_CATEGORY_Jewelry NAME_GOODS_CATEGORY_Medical Supplies NAME_GOODS_CATEGORY_Medicine NAME_GOODS_CATEGORY_Mobile NAME_GOODS_CATEGORY_Office Appliances NAME_GOODS_CATEGORY_Other NAME_GOODS_CATEGORY_Photo / Cinema Equipment NAME_GOODS_CATEGORY_Sport and Leisure NAME_GOODS_CATEGORY_Tourism NAME_GOODS_CATEGORY_Vehicles NAME_GOODS_CATEGORY_Weapon NAME_GOODS_CATEGORY_XNA NAME_PORTFOLIO_Cars NAME_PORTFOLIO_Cash NAME_PORTFOLIO_POS NAME_PORTFOLIO_XNA NAME_PRODUCT_TYPE_walk-in NAME_PRODUCT_TYPE_x-sell CHANNEL_TYPE_Car dealer CHANNEL_TYPE_Channel of corporate sales CHANNEL_TYPE_Contact center CHANNEL_TYPE_Country-wide CHANNEL_TYPE_Credit and cash offices CHANNEL_TYPE_Regional / Local CHANNEL_TYPE_Stone NAME_SELLER_INDUSTRY_Clothing NAME_SELLER_INDUSTRY_Connectivity NAME_SELLER_INDUSTRY_Construction NAME_SELLER_INDUSTRY_Consumer electronics NAME_SELLER_INDUSTRY_Furniture NAME_SELLER_INDUSTRY_Industry NAME_SELLER_INDUSTRY_Jewelry NAME_SELLER_INDUSTRY_MLM partners NAME_SELLER_INDUSTRY_Tourism NAME_SELLER_INDUSTRY_XNA NAME_YIELD_GROUP_high NAME_YIELD_GROUP_low_action NAME_YIELD_GROUP_low_normal NAME_YIELD_GROUP_middle PRODUCT_COMBINATION_Card X-Sell PRODUCT_COMBINATION_Cash PRODUCT_COMBINATION_Cash Street: high PRODUCT_COMBINATION_Cash Street: low PRODUCT_COMBINATION_Cash Street: middle PRODUCT_COMBINATION_Cash X-Sell: high PRODUCT_COMBINATION_Cash X-Sell: low PRODUCT_COMBINATION_Cash X-Sell: middle PRODUCT_COMBINATION_POS household with interest PRODUCT_COMBINATION_POS household without interest PRODUCT_COMBINATION_POS industry with interest PRODUCT_COMBINATION_POS industry without interest PRODUCT_COMBINATION_POS mobile with interest PRODUCT_COMBINATION_POS mobile without interest PRODUCT_COMBINATION_POS other with interest PRODUCT_COMBINATION_POS others without interest
563940 -1.363375 -1.953734 -1.504036 -1.942035 -1.474372 -81.379969 -3.750036 -0.481628 1.405831 -2.038992 -2.606161 -3.606291 -1.675502 -0.545223 -1.042012 -0.540370 -1.012305 -1.345729 -0.955689 -0.984741 -1.142528 -0.584879 -1.107973 -0.138939 -1.228691 -0.598700 -0.616827 -0.821454 -3.813066 -13.556997 -15.944536 1.124656 -0.205477 -1.185544 -4.821163 -0.147423 -0.246378 -0.381180 -0.398163 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
186208 -1.363375 -1.953745 -1.504039 -1.942193 -1.474376 12.996496 -3.750038 -0.481645 1.405530 -2.039033 -2.606161 -3.991666 5.222424 -0.710306 3.465808 -0.708112 5.573290 -1.343140 -0.955689 -0.984741 -1.142528 -0.584879 -1.107973 -1.423148 -1.228691 -0.598698 -0.616825 -0.821455 -4.087730 -13.556997 -15.944536 1.122037 -0.204494 -1.185544 -4.821163 -0.147423 -0.246378 -0.381180 -0.398163 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
739774 -1.363375 -1.953725 -1.504039 -1.942211 -1.474376 -11.063907 -3.749922 -0.481645 1.405329 -2.038621 -2.606161 -3.509947 -1.675502 -0.875390 -1.042012 -0.875854 -1.012305 -1.343640 -0.955689 -0.984741 -1.142528 -0.584879 2.956800 -1.423148 -1.228705 -0.598698 -0.616825 -0.821455 -3.355293 -13.556997 -15.944536 1.123757 -0.205469 -1.173405 -4.821163 -0.147423 -0.246378 -0.381180 -0.398163 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
767186 -1.363375 -1.953748 -1.504041 -1.942228 -1.474378 -36.820993 -3.749963 -0.481628 1.405527 -2.038644 -2.606161 -3.991666 -1.675502 -0.710306 -1.042012 -0.708112 -1.012305 -1.345038 -0.955689 -0.984741 -1.142528 -0.584879 0.924413 -0.872773 -1.228680 -0.598699 -0.616826 -0.821455 -3.721512 -13.556997 -15.944536 1.123968 -0.204171 -1.221960 -4.821163 -0.147423 -0.246378 -0.381180 -0.398163 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
502320 0.629792 -1.953739 -1.504039 -1.942182 -1.474377 -11.063907 -3.750374 -0.481645 1.405275 -2.038224 -1.333421 -4.280697 5.222424 -0.875390 -1.042012 -0.875854 -1.012305 -1.345977 -0.955689 -0.984741 -1.142528 0.535865 -1.107973 -1.056231 -1.228736 -0.598697 -0.616824 -0.821454 -4.270840 -13.556997 -15.944536 1.124524 -0.205477 -0.967049 -4.821163 -0.147423 -0.246378 -0.381156 -0.398141 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
In [214]:
Target = round((sum(mergeddf['TARGET'])/len(mergeddf['TARGET'].index))*100,2)
print("We have almost {} %  Converted rate after successful data manipulation".format(Target))
We have almost 8.66 %  Converted rate after successful data manipulation
In [215]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
model = KNeighborsClassifier()
In [216]:
model.fit(X_train,y_train)
Out[216]:
KNeighborsClassifier()
In [217]:
predict_train = model.predict(X_train)
predict_train
Out[217]:
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
In [218]:
trainaccuracy = accuracy_score(y_train,predict_train)
print('accuracy_score on train dataset : ', trainaccuracy)
accuracy_score on train dataset :  0.9061224489795918
In [220]:
X_train.shape
Out[220]:
(4900, 291)
In [221]:
X_test.shape
Out[221]:
(2100, 291)
In [222]:
model.fit(X_train,y_train)
Out[222]:
KNeighborsClassifier()
In [223]:
predict_train = model.predict(X_train)
predict_train
Out[223]:
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
In [224]:
trainaccuracy = accuracy_score(y_train,predict_train)
print('accuracy_score on train dataset : ', trainaccuracy)
accuracy_score on train dataset :  0.9061224489795918
In [225]:
from sklearn import metrics
# Confusion matrix 
confusion = metrics.confusion_matrix(y_train, predict_train )
print(confusion)
[[4411   20]
 [ 440   29]]
In [226]:
TP = confusion[1,1] # true positive 
TN = confusion[0,0] # true negatives
FP = confusion[0,1] # false positives
FN = confusion[1,0] # false negatives
In [227]:
# Let's see the sensitivity of our model
trainsensitivity= TP / float(TP+FN)
trainsensitivity
Out[227]:
0.06183368869936034
In [228]:
# Let us calculate specificity
trainspecificity= TN / float(TN+FP)
trainspecificity
Out[228]:
0.9954863461972466
In [229]:
# Calculate false postive rate - predicting Defaulted when customer does not have Defaulted
print(FP/ float(TN+FP))
0.0045136538027533285
In [230]:
# Positive predictive value 
print (TP / float(TP+FP))
0.5918367346938775
In [231]:
print(TN / float(TN+ FN))
0.909297052154195
In [232]:
def draw_roc( actual, probs ):
    fpr, tpr, thresholds = metrics.roc_curve( actual, probs,
                                              drop_intermediate = False )
    auc_score = metrics.roc_auc_score( actual, probs )
    plt.figure(figsize=(5, 5))
    plt.plot( fpr, tpr, label='ROC curve (area = %0.2f)' % auc_score )
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate or [1 - True Negative Rate]')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()

    return None
In [233]:
draw_roc(y_train,predict_train)
In [234]:
from sklearn.metrics import precision_score, recall_score
precision_score(y_train,predict_train)
Out[234]:
0.5918367346938775
In [235]:
recall_score(y_train,predict_train)  
Out[235]:
0.06183368869936034
In [236]:
# predict the target on the test dataset
predict_test = model.predict(X_test)
print('Target on test data\n\n',predict_test)
Target on test data

 [0 0 0 ... 0 0 0]
In [237]:
confusion2 = metrics.confusion_matrix(y_test, predict_test )
print(confusion2)
[[1928    0]
 [ 172    0]]
In [238]:
TP = confusion2[1,1] # true positive 
TN = confusion2[0,0] # true negatives
FP = confusion2[0,1] # false positives
FN = confusion2[1,0] # false negatives
In [239]:
# Let's check the overall accuracy.
testaccuracy= accuracy_score(y_test,predict_test)
testaccuracy
Out[239]:
0.9180952380952381
In [240]:
# Let's see the sensitivity of our lmodel
testsensitivity=TP / float(TP+FN)
testsensitivity
Out[240]:
0.0
In [241]:
# Let us calculate specificity
testspecificity= TN / float(TN+FP)
testspecificity
Out[241]:
1.0
In [242]:
# Let us compare the values obtained for Train & Test:
print("Train Data Accuracy    :{} %".format(round((trainaccuracy*100),2)))
print("Train Data Sensitivity :{} %".format(round((trainsensitivity*100),2)))
print("Train Data Specificity :{} %".format(round((trainspecificity*100),2)))
print("Test Data Accuracy     :{} %".format(round((testaccuracy*100),2)))
print("Test Data Sensitivity  :{} %".format(round((testsensitivity*100),2)))
print("Test Data Specificity  :{} %".format(round((testspecificity*100),2)))
Train Data Accuracy    :90.61 %
Train Data Sensitivity :6.18 %
Train Data Specificity :99.55 %
Test Data Accuracy     :91.81 %
Test Data Sensitivity  :0.0 %
Test Data Specificity  :100.0 %
In [ ]: